Python2/PyMuPDF: src/__init_

comparison src/init.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.

author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:37:51 +0200
parents
children	d77477b4e151 a6bc019ac0b2

comparison

equal deleted inserted replaced

--1:000000000000
+:1d09e1dec1d9
+'''
+PyMuPDF implemented on top of MuPDF Python bindings.
+License:
+SPDX-License-Identifier: GPL-3.0-only
+'''
+# To reduce startup times, we don't import everything we require here.
+#
+import atexit
+import binascii
+import collections
+import inspect
+import io
+import math
+import os
+import pathlib
+import glob
+import re
+import string
+import sys
+import tarfile
+import time
+import typing
+import warnings
+import weakref
+import zipfile
+from . import extra
+# Set up g_out_log and g_out_message from environment variables.
+#
+# PYMUPDF_MESSAGE controls the destination of user messages (from function
+# `pymupdf.message()`).
+#
+# PYMUPDF_LOG controls the destination of internal development logging (from
+# function `pymupdf.log()`).
+#
+# For syntax, see _make_output()'s `text` arg.
+#
+def _make_output(
+*,
+text=None,
+fd=None,
+stream=None,
+path=None,
+path_append=None,
+pylogging=None,
+pylogging_logger=None,
+pylogging_level=None,
+pylogging_name=None,
+default=None,
+):
+'''
+Returns a stream that writes to a specified destination, which can be a
+file descriptor, a file, an existing stream or Python's `logging' system.
+Args:
+text: text specification of destination.
+fd:<int> - write to file descriptor.
+path:<str> - write to file.
+path+:<str> - append to file.
+logging:<items> - write to Python `logging` module.
+items: comma-separated <name=value> pairs.
+level=<int>
+name=<str>.
+Other names are ignored.
+fd: an int file descriptor.
+stream: something with methods .write(text) and .flush().
+If specified we simply return <stream>.
+path: a file path.
+If specified we return a stream that writes to this file.
+path_append: a file path.
+If specified we return a stream that appends to this file.
+pylogging*:
+if any of these args is not None, we return a stream that writes to
+Python's `logging` module.
+pylogging:
+Unused other than to activate use of logging module.
+pylogging_logger:
+A logging.Logger; If None, set from <pylogging_name>.
+pylogging_level:
+An int log level, if None we use
+pylogging_logger.getEffectiveLevel().
+pylogging_name:
+Only used if <pylogging_logger> is None:
+If <pylogging_name> is None, we set it to 'pymupdf'.
+Then we do: pylogging_logger = logging.getLogger(pylogging_name)
+'''
+if text is not None:
+# Textual specification, for example from from environment variable.
+if text.startswith('fd:'):
+fd = int(text[3:])
+elif text.startswith('path:'):
+path = text[5:]
+elif text.startswith('path+'):
+path_append = text[5:]
+elif text.startswith('logging:'):
+pylogging = True
+items_d = dict()
+items = text[8:].split(',')
+#items_d = {n: v for (n, v) in [item.split('=', 1) for item in items]}
+for item in items:
+if not item:
+continue
+nv = item.split('=', 1)
+assert len(nv) == 2, f'Need `=` in {item=}.'
+n, v = nv
+items_d[n] = v
+pylogging_level = items_d.get('level')
+if pylogging_level is not None:
+pylogging_level = int(pylogging_level)
+pylogging_name = items_d.get('name', 'pymupdf')
+else:
+assert 0, f'Expected prefix `fd:`, `path:`. `path+:` or `logging:` in {text=}.'
+if fd is not None:
+ret = open(fd, mode='w', closefd=False)
+elif stream is not None:
+assert hasattr(stream, 'write')
+assert hasattr(stream, 'flush')
+ret = stream
+elif path is not None:
+ret = open(path, 'w')
+elif path_append is not None:
+ret = open(path_append, 'a')
+elif (0
+or pylogging is not None
+or pylogging_logger is not None
+or pylogging_level is not None
+or pylogging_name is not None
+):
+import logging
+if pylogging_logger is None:
+if pylogging_name is None:
+pylogging_name = 'pymupdf'
+pylogging_logger = logging.getLogger(pylogging_name)
+assert isinstance(pylogging_logger, logging.Logger)
+if pylogging_level is None:
+pylogging_level = pylogging_logger.getEffectiveLevel()
+class Out:
+def write(self, text):
+# `logging` module appends newlines, but so does the `print()`
+# functions in our caller message() and log() fns, so we need to
+# remove them here.
+text = text.rstrip('\n')
+if text:
+pylogging_logger.log(pylogging_level, text)
+def flush(self):
+pass
+ret = Out()
+else:
+ret = default
+return ret
+# Set steam used by PyMuPDF messaging.
+_g_out_message = _make_output(text=os.environ.get('PYMUPDF_MESSAGE'), default=sys.stdout)
+# Set steam used by PyMuPDF development/debugging logging.
+_g_out_log = _make_output(text=os.environ.get('PYMUPDF_LOG'), default=sys.stdout)
+# Things for testing logging.
+_g_log_items = list()
+_g_log_items_active = False
+def _log_items():
+return _g_log_items
+def _log_items_active(active):
+global _g_log_items_active
+_g_log_items_active = active
+def _log_items_clear():
+del _g_log_items[:]
+def set_messages(
+*,
+text=None,
+fd=None,
+stream=None,
+path=None,
+path_append=None,
+pylogging=None,
+pylogging_logger=None,
+pylogging_level=None,
+pylogging_name=None,
+):
+'''
+Sets destination of PyMuPDF messages. See _make_output() for details.
+'''
+global _g_out_message
+_g_out_message = _make_output(
+text=text,
+fd=fd,
+stream=stream,
+path=path,
+path_append=path_append,
+pylogging=pylogging,
+pylogging_logger=pylogging_logger,
+pylogging_level=pylogging_level,
+pylogging_name=pylogging_name,
+default=_g_out_message,
+)
+def set_log(
+*,
+text=None,
+fd=None,
+stream=None,
+path=None,
+path_append=None,
+pylogging=None,
+pylogging_logger=None,
+pylogging_level=None,
+pylogging_name=None,
+):
+'''
+Sets destination of PyMuPDF development/debugging logging. See
+_make_output() for details.
+'''
+global _g_out_log
+_g_out_log = _make_output(
+text=text,
+fd=fd,
+stream=stream,
+path=path,
+path_append=path_append,
+pylogging=pylogging,
+pylogging_logger=pylogging_logger,
+pylogging_level=pylogging_level,
+pylogging_name=pylogging_name,
+default=_g_out_log,
+)
+def log( text='', caller=1):
+'''
+For development/debugging diagnostics.
+'''
+try:
+stack = inspect.stack(context=0)
+except StopIteration:
+pass
+else:
+frame_record = stack[caller]
+try:
+filename = os.path.relpath(frame_record.filename)
+except Exception:   # Can fail on windows.
+filename = frame_record.filename
+line = frame_record.lineno
+function = frame_record.function
+text = f'{filename}:{line}:{function}(): {text}'
+if _g_log_items_active:
+_g_log_items.append(text)
+if _g_out_log:
+print(text, file=_g_out_log, flush=1)
+def message(text=''):
+'''
+For user messages.
+'''
+# It looks like `print()` does nothing if sys.stdout is None (without
+# raising an exception), but we don't rely on this.
+if _g_out_message:
+print(text, file=_g_out_message, flush=1)
+def exception_info():
+import traceback
+log(f'exception_info:')
+log(traceback.format_exc())
+# PDF names must not contain these characters:
+INVALID_NAME_CHARS = set(string.whitespace + "()<>[]{}/%" + chr(0))
+def get_env_bool( name, default):
+'''
+Returns `True`, `False` or `default` depending on whether $<name> is '1',
+'0' or unset. Otherwise assert-fails.
+'''
+v = os.environ.get( name)
+if v is None:
+ret = default
+elif v == '1':
+ret = True
+elif v == '0':
+ret = False
+else:
+assert 0, f'Unrecognised value for {name}: {v!r}'
+if ret != default:
+log(f'Using non-default setting from {name}: {v!r}')
+return ret
+def get_env_int( name, default):
+'''
+Returns `True`, `False` or `default` depending on whether $<name> is '1',
+'0' or unset. Otherwise assert-fails.
+'''
+v = os.environ.get( name)
+if v is None:
+ret = default
+else:
+ret = int(v)
+if ret != default:
+log(f'Using non-default setting from {name}: {v}')
+return ret
+# All our `except ...` blocks output diagnostics if `g_exceptions_verbose` is
+# true.
+g_exceptions_verbose = get_env_int( 'PYMUPDF_EXCEPTIONS_VERBOSE', 1)
+# $PYMUPDF_USE_EXTRA overrides whether to use optimised C fns in `extra`.
+#
+g_use_extra = get_env_bool( 'PYMUPDF_USE_EXTRA', True)
+# Global switches
+#
+class _Globals:
+def __init__(self):
+self.no_device_caching = 0
+self.small_glyph_heights = 0
+self.subset_fontnames = 0
+self.skip_quad_corrections = 0
+_globals = _Globals()
+# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
+# as of 2023-01-20 11:51:40
+#
+mupdf_cppyy = os.environ.get( 'MUPDF_CPPYY')
+if mupdf_cppyy is not None:
+# pylint: disable=all
+log( f'{__file__}: $MUPDF_CPPYY={mupdf_cppyy!r} so attempting to import mupdf_cppyy.')
+log( f'{__file__}: $PYTHONPATH={os.environ["PYTHONPATH"]}')
+if mupdf_cppyy == '':
+import mupdf_cppyy
+else:
+import importlib
+mupdf_cppyy = importlib.machinery.SourceFileLoader(
+'mupdf_cppyy',
+mupdf_cppyy
+).load_module()
+mupdf = mupdf_cppyy.cppyy.gbl.mupdf
+else:
+# Use MuPDF Python SWIG bindings. We allow import from either our own
+# directory for conventional wheel installs, or from separate place in case
+# we are using a separately-installed system installation of mupdf.
+#
+try:
+from . import mupdf
+except Exception:
+import mupdf
+if hasattr(mupdf, 'internal_check_ndebug'):
+mupdf.internal_check_ndebug()
+mupdf.reinit_singlethreaded()
+def _int_rc(text):
+'''
+Converts string to int, ignoring trailing 'rc...'.
+'''
+rc = text.find('rc')
+if rc >= 0:
+text = text[:rc]
+return int(text)
+# Basic version information.
+#
+# (We use `noqa F401` to avoid flake8 errors such as `F401
+# '._build.mupdf_location' imported but unused`.
+#
+from ._build import mupdf_location      # noqa F401
+from ._build import pymupdf_git_branch  # noqa F401
+from ._build import pymupdf_git_diff    # noqa F401
+from ._build import pymupdf_git_sha     # noqa F401
+from ._build import pymupdf_version     # noqa F401
+from ._build import swig_version        # noqa F401
+from ._build import swig_version_tuple  # noqa F401
+mupdf_version = mupdf.FZ_VERSION
+# Removed in PyMuPDF-1.26.1.
+pymupdf_date = None
+# Versions as tuples; useful when comparing versions.
+#
+pymupdf_version_tuple = tuple( [_int_rc(i) for i in pymupdf_version.split('.')])
+mupdf_version_tuple = tuple( [_int_rc(i) for i in mupdf_version.split('.')])
+assert mupdf_version_tuple == (mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH), \
+f'Inconsistent MuPDF version numbers: {mupdf_version_tuple=} != {(mupdf.FZ_VERSION_MAJOR, mupdf.FZ_VERSION_MINOR, mupdf.FZ_VERSION_PATCH)=}'
+# Legacy version information.
+#
+version = (pymupdf_version, mupdf_version, None)
+VersionFitz = mupdf_version
+VersionBind = pymupdf_version
+VersionDate = None
+# String formatting.
+def _format_g(value, *, fmt='%g'):
+'''
+Returns `value` formatted with mupdf.fz_format_double() if available,
+otherwise with Python's `%`.
+If `value` is a list or tuple, we return a space-separated string of
+formatted values.
+'''
+if isinstance(value, (list, tuple)):
+ret = ''
+for v in value:
+if ret:
+ret += ' '
+ret += _format_g(v, fmt=fmt)
+return ret
+else:
+return mupdf.fz_format_double(fmt, value)
+format_g = _format_g
+# ByteString is gone from typing in 3.14.
+# collections.abc.Buffer available from 3.12 only
+try:
+ByteString = typing.ByteString
+except AttributeError:
+ByteString = bytes | bytearray | memoryview
+# Names required by class method typing annotations.
+OptBytes = typing.Optional[ByteString]
+OptDict = typing.Optional[dict]
+OptFloat = typing.Optional[float]
+OptInt = typing.Union[int, None]
+OptSeq = typing.Optional[typing.Sequence]
+OptStr = typing.Optional[str]
+Page = 'Page_forward_decl'
+Point = 'Point_forward_decl'
+matrix_like = 'matrix_like'
+point_like = 'point_like'
+quad_like = 'quad_like'
+rect_like = 'rect_like'
+def _as_fz_document(document):
+'''
+Returns document as a mupdf.FzDocument, upcasting as required. Raises
+'document closed' exception if closed.
+'''
+if isinstance(document, Document):
+if document.is_closed:
+raise ValueError('document closed')
+document = document.this
+if isinstance(document, mupdf.FzDocument):
+return document
+elif isinstance(document, mupdf.PdfDocument):
+return document.super()
+elif document is None:
+assert 0, f'document is None'
+else:
+assert 0, f'Unrecognised {type(document)=}'
+def _as_pdf_document(document, required=True):
+'''
+Returns `document` downcast to a mupdf.PdfDocument. If downcast fails (i.e.
+`document` is not actually a `PdfDocument`) then we assert-fail if `required`
+is true (the default) else return a `mupdf.PdfDocument` with `.m_internal`
+false.
+'''
+if isinstance(document, Document):
+if document.is_closed:
+raise ValueError('document closed')
+document = document.this
+if isinstance(document, mupdf.PdfDocument):
+return document
+elif isinstance(document, mupdf.FzDocument):
+ret = mupdf.PdfDocument(document)
+if required:
+assert ret.m_internal
+return ret
+elif document is None:
+assert 0, f'document is None'
+else:
+assert 0, f'Unrecognised {type(document)=}'
+def _as_fz_page(page):
+'''
+Returns page as a mupdf.FzPage, upcasting as required.
+'''
+if isinstance(page, Page):
+page = page.this
+if isinstance(page, mupdf.PdfPage):
+return page.super()
+elif isinstance(page, mupdf.FzPage):
+return page
+elif page is None:
+assert 0, f'page is None'
+else:
+assert 0, f'Unrecognised {type(page)=}'
+def _as_pdf_page(page, required=True):
+'''
+Returns `page` downcast to a mupdf.PdfPage. If downcast fails (i.e. `page`
+is not actually a `PdfPage`) then we assert-fail if `required` is true (the
+default) else return a `mupdf.PdfPage` with `.m_internal` false.
+'''
+if isinstance(page, Page):
+page = page.this
+if isinstance(page, mupdf.PdfPage):
+return page
+elif isinstance(page, mupdf.FzPage):
+ret = mupdf.pdf_page_from_fz_page(page)
+if required:
+assert ret.m_internal
+return ret
+elif page is None:
+assert 0, f'page is None'
+else:
+assert 0, f'Unrecognised {type(page)=}'
+def _pdf_annot_page(annot):
+'''
+Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
+is not bound to a page instead of returning a mupdf.PdfPage with
+`.m_internal=None`.
+[Some other MuPDF functions such as pdf_update_annot()` already raise a
+similar exception if a pdf_annot's .page field is null.]
+'''
+page = mupdf.pdf_annot_page(annot)
+if not page.m_internal:
+raise RuntimeError('Annot is not bound to a page')
+return page
+# Fixme: we don't support JM_MEMORY=1.
+JM_MEMORY = 0
+# Classes
+#
+class Annot:
+def __init__(self, annot):
+assert isinstance( annot, mupdf.PdfAnnot)
+self.this = annot
+def __repr__(self):
+parent = getattr(self, 'parent', '<>')
+return "'%s' annotation on %s" % (self.type[1], str(parent))
+def __str__(self):
+return self.__repr__()
+def _erase(self):
+if getattr(self, "thisown", False):
+self.thisown = False
+def _get_redact_values(self):
+annot = self.this
+if mupdf.pdf_annot_type(annot) != mupdf.PDF_ANNOT_REDACT:
+return
+values = dict()
+try:
+obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "RO")
+if obj.m_internal:
+message_warning("Ignoring redaction key '/RO'.")
+xref = mupdf.pdf_to_num(obj)
+values[dictkey_xref] = xref
+obj = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "OverlayText")
+if obj.m_internal:
+text = mupdf.pdf_to_text_string(obj)
+values[dictkey_text] = JM_UnicodeFromStr(text)
+else:
+values[dictkey_text] = ''
+obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'))
+align = 0
+if obj.m_internal:
+align = mupdf.pdf_to_int(obj)
+values[dictkey_align] = align
+except Exception:
+if g_exceptions_verbose:    exception_info()
+return
+val = values
+if not val:
+return val
+val["rect"] = self.rect
+text_color, fontname, fontsize = TOOLS._parse_da(self)
+val["text_color"] = text_color
+val["fontname"] = fontname
+val["fontsize"] = fontsize
+fill = self.colors["fill"]
+val["fill"] = fill
+return val
+def _getAP(self):
+if g_use_extra:
+assert isinstance( self.this, mupdf.PdfAnnot)
+ret = extra.Annot_getAP(self.this)
+assert isinstance( ret, bytes)
+return ret
+else:
+r = None
+res = None
+annot = self.this
+assert isinstance( annot, mupdf.PdfAnnot)
+annot_obj = mupdf.pdf_annot_obj( annot)
+ap = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
+if mupdf.pdf_is_stream( ap):
+res = mupdf.pdf_load_stream( ap)
+if res and res.m_internal:
+r = JM_BinFromBuffer(res)
+return r
+def _setAP(self, buffer_, rect=0):
+try:
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj( annot)
+page = _pdf_annot_page(annot)
+apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
+if not apobj.m_internal:
+raise RuntimeError( MSG_BAD_APN)
+if not mupdf.pdf_is_stream( apobj):
+raise RuntimeError( MSG_BAD_APN)
+res = JM_BufferFromBytes( buffer_)
+if not res.m_internal:
+raise ValueError( MSG_BAD_BUFFER)
+JM_update_stream( page.doc(), apobj, res, 1)
+if rect:
+bbox = mupdf.pdf_dict_get_rect( annot_obj, PDF_NAME('Rect'))
+mupdf.pdf_dict_put_rect( apobj, PDF_NAME('BBox'), bbox)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotate=-1):
+annot = self.this
+assert annot.m_internal
+annot_obj = mupdf.pdf_annot_obj( annot)
+page = _pdf_annot_page(annot)
+pdf = page.doc()
+type_ = mupdf.pdf_annot_type( annot)
+nfcol, fcol = JM_color_FromSequence(fill_color)
+try:
+# remove fill color from unsupported annots
+# or if so requested
+if nfcol == 0 or type_ not in (
+mupdf.PDF_ANNOT_SQUARE,
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON
+):
+mupdf.pdf_dict_del( annot_obj, PDF_NAME('IC'))
+elif nfcol > 0:
+mupdf.pdf_set_annot_interior_color( annot, fcol[:nfcol])
+insert_rot = 1 if rotate >= 0 else 0
+if type_ not in (
+mupdf.PDF_ANNOT_CARET,
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FREE_TEXT,
+mupdf.PDF_ANNOT_FILE_ATTACHMENT,
+mupdf.PDF_ANNOT_INK,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+mupdf.PDF_ANNOT_STAMP,
+mupdf.PDF_ANNOT_TEXT,
+):
+insert_rot = 0
+if insert_rot:
+mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
+# insert fill color
+if type_ == mupdf.PDF_ANNOT_FREE_TEXT:
+if nfcol > 0:
+mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
+elif nfcol > 0:
+col = mupdf.pdf_new_array(page.doc(), nfcol)
+for i in range( nfcol):
+mupdf.pdf_array_push_real(col, fcol[i])
+mupdf.pdf_dict_put(annot_obj, PDF_NAME('IC'), col)
+mupdf.pdf_dirty_annot(annot)
+mupdf.pdf_update_annot(annot) # let MuPDF update
+pdf.resynth_required = 0
+except Exception as e:
+if g_exceptions_verbose:
+exception_info()
+message( f'cannot update annot: {e}')
+raise
+if (opacity < 0 or opacity >= 1) and not blend_mode:    # no opacity, no blend_mode
+return True
+try:    # create or update /ExtGState
+ap = mupdf.pdf_dict_getl(
+mupdf.pdf_annot_obj(annot),
+PDF_NAME('AP'),
+PDF_NAME('N')
+)
+if not ap.m_internal:   # should never happen
+raise RuntimeError( MSG_BAD_APN)
+resources = mupdf.pdf_dict_get( ap, PDF_NAME('Resources'))
+if not resources.m_internal:    # no Resources yet: make one
+resources = mupdf.pdf_dict_put_dict( ap, PDF_NAME('Resources'), 2)
+alp0 = mupdf.pdf_new_dict( page.doc(), 3)
+if opacity >= 0 and opacity < 1:
+mupdf.pdf_dict_put_real( alp0, PDF_NAME('CA'), opacity)
+mupdf.pdf_dict_put_real( alp0, PDF_NAME('ca'), opacity)
+mupdf.pdf_dict_put_real( annot_obj, PDF_NAME('CA'), opacity)
+if blend_mode:
+mupdf.pdf_dict_put_name( alp0, PDF_NAME('BM'), blend_mode)
+mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('BM'), blend_mode)
+extg = mupdf.pdf_dict_get( resources, PDF_NAME('ExtGState'))
+if not extg.m_internal: # no ExtGState yet: make one
+extg = mupdf.pdf_dict_put_dict( resources, PDF_NAME('ExtGState'), 2)
+mupdf.pdf_dict_put( extg, PDF_NAME('H'), alp0)
+except Exception as e:
+if g_exceptions_verbose:    exception_info()
+message( f'cannot set opacity or blend mode\n: {e}')
+raise
+return True
+@property
+def apn_bbox(self):
+"""annotation appearance bbox"""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
+if not ap.m_internal:
+val = JM_py_from_rect(mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE))
+else:
+rect = mupdf.pdf_dict_get_rect(ap, PDF_NAME('BBox'))
+val = JM_py_from_rect(rect)
+val = Rect(val) * self.get_parent().transformation_matrix
+val *= self.get_parent().derotation_matrix
+return val
+@property
+def apn_matrix(self):
+"""annotation appearance matrix"""
+try:
+CheckParent(self)
+annot = self.this
+assert isinstance(annot, mupdf.PdfAnnot)
+ap = mupdf.pdf_dict_getl(
+mupdf.pdf_annot_obj(annot),
+mupdf.PDF_ENUM_NAME_AP,
+mupdf.PDF_ENUM_NAME_N
+)
+if not ap.m_internal:
+return JM_py_from_matrix(mupdf.FzMatrix())
+mat = mupdf.pdf_dict_get_matrix(ap, mupdf.PDF_ENUM_NAME_Matrix)
+val = JM_py_from_matrix(mat)
+val = Matrix(val)
+return val
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+@property
+def blendmode(self):
+"""annotation BlendMode"""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('BM'))
+blend_mode = None
+if obj.m_internal:
+blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(obj))
+return blend_mode
+# loop through the /AP/N/Resources/ExtGState objects
+obj = mupdf.pdf_dict_getl(
+annot_obj,
+PDF_NAME('AP'),
+PDF_NAME('N'),
+PDF_NAME('Resources'),
+PDF_NAME('ExtGState'),
+)
+if mupdf.pdf_is_dict(obj):
+n = mupdf.pdf_dict_len(obj)
+for i in range(n):
+obj1 = mupdf.pdf_dict_get_val(obj, i)
+if mupdf.pdf_is_dict(obj1):
+m = mupdf.pdf_dict_len(obj1)
+for j in range(m):
+obj2 = mupdf.pdf_dict_get_key(obj1, j)
+if mupdf.pdf_objcmp(obj2, PDF_NAME('BM')) == 0:
+blend_mode = JM_UnicodeFromStr(mupdf.pdf_to_name(mupdf.pdf_dict_get_val(obj1, j)))
+return blend_mode
+return blend_mode
+@property
+def border(self):
+"""Border information."""
+CheckParent(self)
+atype = self.type[0]
+if atype not in (
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FREE_TEXT,
+mupdf.PDF_ANNOT_INK,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+):
+return dict()
+ao = mupdf.pdf_annot_obj(self.this)
+ret = JM_annot_border(ao)
+return ret
+def clean_contents(self, sanitize=1):
+"""Clean appearance contents stream."""
+CheckParent(self)
+annot = self.this
+pdf = mupdf.pdf_get_bound_document(mupdf.pdf_annot_obj(annot))
+filter_ = _make_PdfFilterOptions(recurse=1, instance_forms=0, ascii=0, sanitize=sanitize)
+mupdf.pdf_filter_annot_contents(pdf, annot, filter_)
+@property
+def colors(self):
+"""Color definitions."""
+try:
+CheckParent(self)
+annot = self.this
+assert isinstance(annot, mupdf.PdfAnnot)
+return JM_annot_colors(mupdf.pdf_annot_obj(annot))
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def delete_responses(self):
+"""Delete 'Popup' and responding annotations."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+page = _pdf_annot_page(annot)
+while 1:
+irt_annot = JM_find_annot_irt(annot)
+if not irt_annot:
+break
+mupdf.pdf_delete_annot(page, irt_annot)
+mupdf.pdf_dict_del(annot_obj, PDF_NAME('Popup'))
+annots = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Annots'))
+n = mupdf.pdf_array_len(annots)
+found = 0
+for i in range(n-1, -1, -1):
+o = mupdf.pdf_array_get(annots, i)
+p = mupdf.pdf_dict_get(o, PDF_NAME('Parent'))
+if not o.m_internal:
+continue
+if not mupdf.pdf_objcmp(p, annot_obj):
+mupdf.pdf_array_delete(annots, i)
+found = 1
+if found:
+mupdf.pdf_dict_put(page.obj(), PDF_NAME('Annots'), annots)
+@property
+def file_info(self):
+"""Attached file information."""
+CheckParent(self)
+res = dict()
+length = -1
+size = -1
+desc = None
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+type_ = mupdf.pdf_annot_type(annot)
+if type_ != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
+raise TypeError( MSG_BAD_ANNOT_TYPE)
+stream = mupdf.pdf_dict_getl(
+annot_obj,
+PDF_NAME('FS'),
+PDF_NAME('EF'),
+PDF_NAME('F'),
+)
+if not stream.m_internal:
+RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
+fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
+o = mupdf.pdf_dict_get(fs, PDF_NAME('UF'))
+if o.m_internal:
+filename = mupdf.pdf_to_text_string(o)
+else:
+o = mupdf.pdf_dict_get(fs, PDF_NAME('F'))
+if o.m_internal:
+filename = mupdf.pdf_to_text_string(o)
+o = mupdf.pdf_dict_get(fs, PDF_NAME('Desc'))
+if o.m_internal:
+desc = mupdf.pdf_to_text_string(o)
+o = mupdf.pdf_dict_get(stream, PDF_NAME('Length'))
+if o.m_internal:
+length = mupdf.pdf_to_int(o)
+o = mupdf.pdf_dict_getl(stream, PDF_NAME('Params'), PDF_NAME('Size'))
+if o.m_internal:
+size = mupdf.pdf_to_int(o)
+res[ dictkey_filename] = JM_EscapeStrFromStr(filename)
+res[ dictkey_descr] = JM_UnicodeFromStr(desc)
+res[ dictkey_length] = length
+res[ dictkey_size] = size
+return res
+@property
+def flags(self):
+"""Flags field."""
+CheckParent(self)
+annot = self.this
+return mupdf.pdf_annot_flags(annot)
+def get_file(self):
+"""Retrieve attached file content."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+type = mupdf.pdf_annot_type(annot)
+if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
+raise TypeError( MSG_BAD_ANNOT_TYPE)
+stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
+if not stream.m_internal:
+RAISEPY( "bad PDF: file entry not found", JM_Exc_FileDataError)
+buf = mupdf.pdf_load_stream(stream)
+res = JM_BinFromBuffer(buf)
+return res
+def get_oc(self):
+"""Get annotation optional content reference."""
+CheckParent(self)
+oc = 0
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('OC'))
+if obj.m_internal:
+oc = mupdf.pdf_to_num(obj)
+return oc
+# PyMuPDF doesn't seem to have this .parent member, but removing it breaks
+# 11 tests...?
+#@property
+def get_parent(self):
+try:
+ret = getattr( self, 'parent')
+except AttributeError:
+page = _pdf_annot_page(self.this)
+assert isinstance( page, mupdf.PdfPage)
+document = Document( page.doc()) if page.m_internal else None
+ret = Page(page, document)
+#self.parent = weakref.proxy( ret)
+self.parent = ret
+#log(f'No attribute .parent: {type(self)=} {id(self)=}: have set {id(self.parent)=}.')
+#log( f'Have set self.parent')
+return ret
+def get_pixmap(self, matrix=None, dpi=None, colorspace=None, alpha=0):
+"""annotation Pixmap"""
+CheckParent(self)
+cspaces = {"gray": csGRAY, "rgb": csRGB, "cmyk": csCMYK}
+if type(colorspace) is str:
+colorspace = cspaces.get(colorspace.lower(), None)
+if dpi:
+matrix = Matrix(dpi / 72, dpi / 72)
+ctm = JM_matrix_from_py(matrix)
+cs = colorspace
+if not cs:
+cs = mupdf.fz_device_rgb()
+pix = mupdf.pdf_new_pixmap_from_annot(self.this, ctm, cs, mupdf.FzSeparations(0), alpha)
+ret = Pixmap(pix)
+if dpi:
+ret.set_dpi(dpi, dpi)
+return ret
+def get_sound(self):
+"""Retrieve sound stream."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+type = mupdf.pdf_annot_type(annot)
+sound = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Sound'))
+if type != mupdf.PDF_ANNOT_SOUND or not sound.m_internal:
+raise TypeError( MSG_BAD_ANNOT_TYPE)
+if mupdf.pdf_dict_get(sound, PDF_NAME('F')).m_internal:
+RAISEPY( "unsupported sound stream", JM_Exc_FileDataError)
+res = dict()
+obj = mupdf.pdf_dict_get(sound, PDF_NAME('R'))
+if obj.m_internal:
+res['rate'] = mupdf.pdf_to_real(obj)
+obj = mupdf.pdf_dict_get(sound, PDF_NAME('C'))
+if obj.m_internal:
+res['channels'] = mupdf.pdf_to_int(obj)
+obj = mupdf.pdf_dict_get(sound, PDF_NAME('B'))
+if obj.m_internal:
+res['bps'] = mupdf.pdf_to_int(obj)
+obj = mupdf.pdf_dict_get(sound, PDF_NAME('E'))
+if obj.m_internal:
+res['encoding'] = mupdf.pdf_to_name(obj)
+obj = mupdf.pdf_dict_gets(sound, "CO")
+if obj.m_internal:
+res['compression'] = mupdf.pdf_to_name(obj)
+buf = mupdf.pdf_load_stream(sound)
+stream = JM_BinFromBuffer(buf)
+res['stream'] = stream
+return res
+def get_textpage(self, clip=None, flags=0):
+"""Make annotation TextPage."""
+CheckParent(self)
+options = mupdf.FzStextOptions(flags)
+if clip:
+assert hasattr(mupdf, 'FZ_STEXT_CLIP_RECT'), f'MuPDF-{mupdf_version} does not support FZ_STEXT_CLIP_RECT.'
+clip2 = JM_rect_from_py(clip)
+options.clip = clip2.internal()
+options.flags |= mupdf.FZ_STEXT_CLIP_RECT
+annot = self.this
+stextpage = mupdf.FzStextPage(annot, options)
+ret = TextPage(stextpage)
+p = self.get_parent()
+if isinstance(p, weakref.ProxyType):
+ret.parent = p
+else:
+ret.parent = weakref.proxy(p)
+return ret
+@property
+def has_popup(self):
+"""Check if annotation has a Popup."""
+CheckParent(self)
+annot = self.this
+obj = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Popup'))
+return True if obj.m_internal else False
+@property
+def info(self):
+"""Various information details."""
+CheckParent(self)
+annot = self.this
+res = dict()
+res[dictkey_content] = JM_UnicodeFromStr(mupdf.pdf_annot_contents(annot))
+o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('Name'))
+res[dictkey_name] = JM_UnicodeFromStr(mupdf.pdf_to_name(o))
+# Title (= author)
+o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('T'))
+res[dictkey_title] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
+# CreationDate
+o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "CreationDate")
+res[dictkey_creationDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
+# ModDate
+o = mupdf.pdf_dict_get(mupdf.pdf_annot_obj(annot), PDF_NAME('M'))
+res[dictkey_modDate] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
+# Subj
+o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "Subj")
+res[dictkey_subject] = mupdf.pdf_to_text_string(o)
+# Identification (PDF key /NM)
+o = mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM")
+res[dictkey_id] = JM_UnicodeFromStr(mupdf.pdf_to_text_string(o))
+return res
+@property
+def irt_xref(self):
+'''
+annotation IRT xref
+'''
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj( annot)
+irt = mupdf.pdf_dict_get( annot_obj, PDF_NAME('IRT'))
+if not irt.m_internal:
+return 0
+return mupdf.pdf_to_num( irt)
+@property
+def is_open(self):
+"""Get 'open' status of annotation or its Popup."""
+CheckParent(self)
+return mupdf.pdf_annot_is_open(self.this)
+@property
+def language(self):
+"""annotation language"""
+this_annot = self.this
+lang = mupdf.pdf_annot_language(this_annot)
+if lang == mupdf.FZ_LANG_UNSET:
+return
+assert hasattr(mupdf, 'fz_string_from_text_language2')
+return mupdf.fz_string_from_text_language2(lang)
+@property
+def line_ends(self):
+"""Line end codes."""
+CheckParent(self)
+annot = self.this
+# return nothing for invalid annot types
+if not mupdf.pdf_annot_has_line_ending_styles(annot):
+return
+lstart = mupdf.pdf_annot_line_start_style(annot)
+lend = mupdf.pdf_annot_line_end_style(annot)
+return lstart, lend
+@property
+def next(self):
+"""Next annotation."""
+CheckParent(self)
+this_annot = self.this
+assert isinstance(this_annot, mupdf.PdfAnnot)
+assert this_annot.m_internal
+type_ = mupdf.pdf_annot_type(this_annot)
+if type_ != mupdf.PDF_ANNOT_WIDGET:
+annot = mupdf.pdf_next_annot(this_annot)
+else:
+annot = mupdf.pdf_next_widget(this_annot)
+val = Annot(annot) if annot.m_internal else None
+if not val:
+return None
+val.thisown = True
+assert val.get_parent().this.m_internal_value() == self.get_parent().this.m_internal_value()
+val.parent._annot_refs[id(val)] = val
+if val.type[0] == mupdf.PDF_ANNOT_WIDGET:
+widget = Widget()
+TOOLS._fill_widget(val, widget)
+val = widget
+return val
+@property
+def opacity(self):
+"""Opacity."""
+CheckParent(self)
+annot = self.this
+opy = -1
+ca = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_CA)
+if mupdf.pdf_is_number(ca):
+opy = mupdf.pdf_to_real(ca)
+return opy
+@property
+def popup_rect(self):
+"""annotation 'Popup' rectangle"""
+CheckParent(self)
+rect = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj( annot)
+obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Popup'))
+if obj.m_internal:
+rect = mupdf.pdf_dict_get_rect(obj, PDF_NAME('Rect'))
+#log( '{rect=}')
+val = JM_py_from_rect(rect)
+#log( '{val=}')
+val = Rect(val) * self.get_parent().transformation_matrix
+val *= self.get_parent().derotation_matrix
+return val
+@property
+def popup_xref(self):
+"""annotation 'Popup' xref"""
+CheckParent(self)
+xref = 0
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Popup'))
+if obj.m_internal:
+xref = mupdf.pdf_to_num(obj)
+return xref
+@property
+def rect(self):
+"""annotation rectangle"""
+if g_use_extra:
+val = extra.Annot_rect3( self.this)
+else:
+val = mupdf.pdf_bound_annot(self.this)
+val = Rect(val)
+# Caching self.parent_() reduces 1000x from 0.07 to 0.04.
+#
+p = self.get_parent()
+#p = getattr( self, 'parent', None)
+#if p is None:
+#    p = self.parent
+#    self.parent = p
+#p = self.parent_()
+val *= p.derotation_matrix
+return val
+@property
+def rect_delta(self):
+'''
+annotation delta values to rectangle
+'''
+annot_obj = mupdf.pdf_annot_obj(self.this)
+arr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('RD'))
+if mupdf.pdf_array_len( arr) == 4:
+return (
+mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 0)),
+mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 1)),
+-mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 2)),
+-mupdf.pdf_to_real( mupdf.pdf_array_get( arr, 3)),
+)
+@property
+def rotation(self):
+"""annotation rotation"""
+CheckParent(self)
+annot = self.this
+rotation = mupdf.pdf_dict_get( mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_Rotate)
+if not rotation.m_internal:
+return -1
+return mupdf.pdf_to_int( rotation)
+def set_apn_bbox(self, bbox):
+"""
+Set annotation appearance bbox.
+"""
+CheckParent(self)
+page = self.get_parent()
+rot = page.rotation_matrix
+mat = page.transformation_matrix
+bbox *= rot * ~mat
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
+if not ap.m_internal:
+raise RuntimeError( MSG_BAD_APN)
+rect = JM_rect_from_py(bbox)
+mupdf.pdf_dict_put_rect(ap, PDF_NAME('BBox'), rect)
+def set_apn_matrix(self, matrix):
+"""Set annotation appearance matrix."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+ap = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
+if not ap.m_internal:
+raise RuntimeError( MSG_BAD_APN)
+mat = JM_matrix_from_py(matrix)
+mupdf.pdf_dict_put_matrix(ap, PDF_NAME('Matrix'), mat)
+def set_blendmode(self, blend_mode):
+"""Set annotation BlendMode."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('BM'), blend_mode)
+def set_border(self, border=None, width=-1, style=None, dashes=None, clouds=-1):
+"""Set border properties.
+Either a dict, or direct arguments width, style, dashes or clouds."""
+CheckParent(self)
+atype, atname = self.type[:2]  # annotation type
+if atype not in (
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FREE_TEXT,
+mupdf.PDF_ANNOT_INK,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+):
+message(f"Cannot set border for '{atname}'.")
+return None
+if atype not in (
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FREE_TEXT,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+):
+if clouds > 0:
+message(f"Cannot set cloudy border for '{atname}'.")
+clouds = -1  # do not set border effect
+if type(border) is not dict:
+border = {"width": width, "style": style, "dashes": dashes, "clouds": clouds}
+border.setdefault("width", -1)
+border.setdefault("style", None)
+border.setdefault("dashes", None)
+border.setdefault("clouds", -1)
+if border["width"] is None:
+border["width"] = -1
+if border["clouds"] is None:
+border["clouds"] = -1
+if hasattr(border["dashes"], "__getitem__"):  # ensure sequence items are integers
+border["dashes"] = tuple(border["dashes"])
+for item in border["dashes"]:
+if not isinstance(item, int):
+border["dashes"] = None
+break
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj( annot)
+pdf = mupdf.pdf_get_bound_document( annot_obj)
+return JM_annot_set_border( border, pdf, annot_obj)
+def set_colors(self, colors=None, stroke=None, fill=None):
+"""Set 'stroke' and 'fill' colors.
+Use either a dict or the direct arguments.
+"""
+if self.type[0] == mupdf.PDF_ANNOT_FREE_TEXT:
+raise ValueError("cannot be used for FreeText annotations")
+CheckParent(self)
+doc = self.get_parent().parent
+if type(colors) is not dict:
+colors = {"fill": fill, "stroke": stroke}
+fill = colors.get("fill")
+stroke = colors.get("stroke")
+fill_annots = (mupdf.PDF_ANNOT_CIRCLE, mupdf.PDF_ANNOT_SQUARE, mupdf.PDF_ANNOT_LINE, mupdf.PDF_ANNOT_POLY_LINE, mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_REDACT,)
+if stroke in ([], ()):
+doc.xref_set_key(self.xref, "C", "[]")
+elif stroke is not None:
+if hasattr(stroke, "__float__"):
+stroke = [float(stroke)]
+CheckColor(stroke)
+assert len(stroke) in (1, 3, 4)
+s = f"[{_format_g(stroke)}]"
+doc.xref_set_key(self.xref, "C", s)
+if fill and self.type[0] not in fill_annots:
+message("Warning: fill color ignored for annot type '%s'." % self.type[1])
+return
+if fill in ([], ()):
+doc.xref_set_key(self.xref, "IC", "[]")
+elif fill is not None:
+if hasattr(fill, "__float__"):
+fill = [float(fill)]
+CheckColor(fill)
+assert len(fill) in (1, 3, 4)
+s = f"[{_format_g(fill)}]"
+doc.xref_set_key(self.xref, "IC", s)
+def set_flags(self, flags):
+"""Set annotation flags."""
+CheckParent(self)
+annot = self.this
+mupdf.pdf_set_annot_flags(annot, flags)
+def set_info(self, info=None, content=None, title=None, creationDate=None, modDate=None, subject=None):
+"""Set various properties."""
+CheckParent(self)
+if type(info) is dict:  # build the args from the dictionary
+content = info.get("content", None)
+title = info.get("title", None)
+creationDate = info.get("creationDate", None)
+modDate = info.get("modDate", None)
+subject = info.get("subject", None)
+info = None
+annot = self.this
+# use this to indicate a 'markup' annot type
+is_markup = mupdf.pdf_annot_has_author(annot)
+# contents
+if content:
+mupdf.pdf_set_annot_contents(annot, content)
+if is_markup:
+# title (= author)
+if title:
+mupdf.pdf_set_annot_author(annot, title)
+# creation date
+if creationDate:
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('CreationDate'), creationDate)
+# mod date
+if modDate:
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('M'), modDate)
+# subject
+if subject:
+mupdf.pdf_dict_puts(mupdf.pdf_annot_obj(annot), "Subj", mupdf.pdf_new_text_string(subject))
+def set_irt_xref(self, xref):
+'''
+Set annotation IRT xref
+'''
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj( annot)
+page = _pdf_annot_page(annot)
+if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
+raise ValueError( MSG_BAD_XREF)
+irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
+subt = mupdf.pdf_dict_get( irt, PDF_NAME('Subtype'))
+irt_subt = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subt))
+if irt_subt < 0:
+raise ValueError( MSG_IS_NO_ANNOT)
+mupdf.pdf_dict_put( annot_obj, PDF_NAME('IRT'), irt)
+def set_language(self, language=None):
+"""Set annotation language."""
+CheckParent(self)
+this_annot = self.this
+if not language:
+lang = mupdf.FZ_LANG_UNSET
+else:
+lang = mupdf.fz_text_language_from_string(language)
+mupdf.pdf_set_annot_language(this_annot, lang)
+def set_line_ends(self, start, end):
+"""Set line end codes."""
+CheckParent(self)
+annot = self.this
+if mupdf.pdf_annot_has_line_ending_styles(annot):
+mupdf.pdf_set_annot_line_ending_styles(annot, start, end)
+else:
+message_warning("bad annot type for line ends")
+def set_name(self, name):
+"""Set /Name (icon) of annotation."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('Name'), name)
+def set_oc(self, oc=0):
+"""Set / remove annotation OC xref."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+if not oc:
+mupdf.pdf_dict_del(annot_obj, PDF_NAME('OC'))
+else:
+JM_add_oc_object(mupdf.pdf_get_bound_document(annot_obj), annot_obj, oc)
+def set_opacity(self, opacity):
+"""Set opacity."""
+CheckParent(self)
+annot = self.this
+if not _INRANGE(opacity, 0.0, 1.0):
+mupdf.pdf_set_annot_opacity(annot, 1)
+return
+mupdf.pdf_set_annot_opacity(annot, opacity)
+if opacity < 1.0:
+page = _pdf_annot_page(annot)
+page.transparency = 1
+def set_open(self, is_open):
+"""Set 'open' status of annotation or its Popup."""
+CheckParent(self)
+annot = self.this
+mupdf.pdf_set_annot_is_open(annot, is_open)
+def set_popup(self, rect):
+'''
+Create annotation 'Popup' or update rectangle.
+'''
+CheckParent(self)
+annot = self.this
+pdfpage = _pdf_annot_page(annot)
+rot = JM_rotate_page_matrix(pdfpage)
+r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
+mupdf.pdf_set_annot_popup(annot, r)
+def set_rect(self, rect):
+"""Set annotation rectangle."""
+CheckParent(self)
+annot = self.this
+pdfpage = _pdf_annot_page(annot)
+rot = JM_rotate_page_matrix(pdfpage)
+r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
+if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
+raise ValueError( MSG_BAD_RECT)
+try:
+mupdf.pdf_set_annot_rect(annot, r)
+except Exception as e:
+message(f'cannot set rect: {e}')
+return False
+def set_rotation(self, rotate=0):
+"""Set annotation rotation."""
+CheckParent(self)
+annot = self.this
+type = mupdf.pdf_annot_type(annot)
+if type not in (
+mupdf.PDF_ANNOT_CARET,
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FREE_TEXT,
+mupdf.PDF_ANNOT_FILE_ATTACHMENT,
+mupdf.PDF_ANNOT_INK,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+mupdf.PDF_ANNOT_STAMP,
+mupdf.PDF_ANNOT_TEXT,
+):
+return
+rot = rotate
+while rot < 0:
+rot += 360
+while rot >= 360:
+rot -= 360
+if type == mupdf.PDF_ANNOT_FREE_TEXT and rot % 90 != 0:
+rot = 0
+annot_obj = mupdf.pdf_annot_obj(annot)
+mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rot)
+@property
+def type(self):
+"""annotation type"""
+CheckParent(self)
+if not self.this.m_internal:
+return 'null'
+type_ = mupdf.pdf_annot_type(self.this)
+c = mupdf.pdf_string_from_annot_type(type_)
+o = mupdf.pdf_dict_gets( mupdf.pdf_annot_obj(self.this), 'IT')
+if not o.m_internal or mupdf.pdf_is_name(o):
+return (type_, c)
+it = mupdf.pdf_to_name(o)
+return (type_, c, it)
+def update(self,
+blend_mode: OptStr =None,
+opacity: OptFloat =None,
+fontsize: float =0,
+fontname: OptStr =None,
+text_color: OptSeq =None,
+border_color: OptSeq =None,
+fill_color: OptSeq =None,
+cross_out: bool =True,
+rotate: int =-1,
+):
+"""Update annot appearance.
+Notes:
+Depending on the annot type, some parameters make no sense,
+while others are only available in this method to achieve the
+desired result. This is especially true for 'FreeText' annots.
+Args:
+blend_mode: set the blend mode, all annotations.
+opacity: set the opacity, all annotations.
+fontsize: set fontsize, 'FreeText' only.
+fontname: set the font, 'FreeText' only.
+border_color: set border color, 'FreeText' only.
+text_color: set text color, 'FreeText' only.
+fill_color: set fill color, all annotations.
+cross_out: draw diagonal lines, 'Redact' only.
+rotate: set rotation, 'FreeText' and some others.
+"""
+annot_obj = mupdf.pdf_annot_obj(self.this)
+if border_color:
+is_rich_text = mupdf.pdf_dict_get(annot_obj, PDF_NAME("RC"))
+if not is_rich_text:
+raise ValueError("cannot set border_color if rich_text is False")
+Annot.update_timing_test()
+CheckParent(self)
+def color_string(cs, code):
+"""Return valid PDF color operator for a given color sequence.
+"""
+cc = ColorCode(cs, code)
+if not cc:
+return b""
+return (cc + "\n").encode()
+annot_type = self.type[0]  # get the annot type
+dt = self.border.get("dashes", None)  # get the dashes spec
+bwidth = self.border.get("width", -1)  # get border line width
+stroke = self.colors["stroke"]  # get the stroke color
+if fill_color is not None:
+fill = fill_color
+else:
+fill = self.colors["fill"]
+rect = None  # self.rect  # prevent MuPDF fiddling with it
+apnmat = self.apn_matrix  # prevent MuPDF fiddling with it
+if rotate != -1:  # sanitize rotation value
+while rotate < 0:
+rotate += 360
+while rotate >= 360:
+rotate -= 360
+if annot_type == mupdf.PDF_ANNOT_FREE_TEXT and rotate % 90 != 0:
+rotate = 0
+#------------------------------------------------------------------
+# handle opacity and blend mode
+#------------------------------------------------------------------
+if blend_mode is None:
+blend_mode = self.blendmode
+if not hasattr(opacity, "__float__"):
+opacity = self.opacity
+if 0 <= opacity < 1 or blend_mode:
+opa_code = "/H gs\n"  # then we must reference this 'gs'
+else:
+opa_code = ""
+if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
+CheckColor(text_color)
+CheckColor(fill_color)
+tcol, fname, fsize = TOOLS._parse_da(self)
+# read and update default appearance as necessary
+if fsize <= 0:
+fsize = 12
+if text_color:
+tcol = text_color
+if fontname:
+fname = fontname
+if fontsize > 0:
+fsize = fontsize
+JM_make_annot_DA(self, len(tcol), tcol, fname, fsize)
+blend_mode = None  # not supported for free text annotations!
+#------------------------------------------------------------------
+# now invoke MuPDF to update the annot appearance
+#------------------------------------------------------------------
+val = self._update_appearance(
+opacity=opacity,
+blend_mode=blend_mode,
+fill_color=fill,
+rotate=rotate,
+)
+if val is False:
+raise RuntimeError("Error updating annotation.")
+if annot_type == mupdf.PDF_ANNOT_FREE_TEXT:
+# in absence of previous opacity, we may need to modify the AP
+ap = self._getAP()
+if 0 <= opacity < 1 and not ap.startswith(b"/H gs"):
+self._setAP(b"/H gs\n" + ap)
+return
+bfill = color_string(fill, "f")
+bstroke = color_string(stroke, "c")
+p_ctm = self.get_parent().transformation_matrix
+imat = ~p_ctm  # inverse page transf. matrix
+if dt:
+dashes = "[" + " ".join(map(str, dt)) + "] 0 d\n"
+dashes = dashes.encode("utf-8")
+else:
+dashes = None
+if self.line_ends:
+line_end_le, line_end_ri = self.line_ends
+else:
+line_end_le, line_end_ri = 0, 0  # init line end codes
+# read contents as created by MuPDF
+ap = self._getAP()
+ap_tab = ap.splitlines()  # split in single lines
+ap_updated = False  # assume we did nothing
+if annot_type == mupdf.PDF_ANNOT_REDACT:
+if cross_out:  # create crossed-out rect
+ap_updated = True
+ap_tab = ap_tab[:-1]
+_, LL, LR, UR, UL = ap_tab
+ap_tab.append(LR)
+ap_tab.append(LL)
+ap_tab.append(UR)
+ap_tab.append(LL)
+ap_tab.append(UL)
+ap_tab.append(b"S")
+if bwidth > 0 or bstroke != b"":
+ap_updated = True
+ntab = [_format_g(bwidth).encode() + b" w"] if bwidth > 0 else []
+for line in ap_tab:
+if line.endswith(b"w"):
+continue
+if line.endswith(b"RG") and bstroke != b"":
+line = bstroke[:-1]
+ntab.append(line)
+ap_tab = ntab
+ap = b"\n".join(ap_tab)
+if annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
+ap = b"\n".join(ap_tab[:-1]) + b"\n"
+ap_updated = True
+if bfill != b"":
+if annot_type == mupdf.PDF_ANNOT_POLYGON:
+ap = ap + bfill + b"b"  # close, fill, and stroke
+elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
+ap = ap + b"S"  # stroke
+else:
+if annot_type == mupdf.PDF_ANNOT_POLYGON:
+ap = ap + b"s"  # close and stroke
+elif annot_type == mupdf.PDF_ANNOT_POLY_LINE:
+ap = ap + b"S"  # stroke
+if dashes is not None:  # handle dashes
+ap = dashes + ap
+# reset dashing - only applies for LINE annots with line ends given
+ap = ap.replace(b"\nS\n", b"\nS\n[] 0 d\n", 1)
+ap_updated = True
+if opa_code:
+ap = opa_code.encode("utf-8") + ap
+ap_updated = True
+ap = b"q\n" + ap + b"\nQ\n"
+#----------------------------------------------------------------------
+# the following handles line end symbols for 'Polygon' and 'Polyline'
+#----------------------------------------------------------------------
+if line_end_le + line_end_ri > 0 and annot_type in (mupdf.PDF_ANNOT_POLYGON, mupdf.PDF_ANNOT_POLY_LINE):
+le_funcs = (None, TOOLS._le_square, TOOLS._le_circle,
+TOOLS._le_diamond, TOOLS._le_openarrow,
+TOOLS._le_closedarrow, TOOLS._le_butt,
+TOOLS._le_ropenarrow, TOOLS._le_rclosedarrow,
+TOOLS._le_slash)
+le_funcs_range = range(1, len(le_funcs))
+d = 2 * max(1, self.border["width"])
+rect = self.rect + (-d, -d, d, d)
+ap_updated = True
+points = self.vertices
+if line_end_le in le_funcs_range:
+p1 = Point(points[0]) * imat
+p2 = Point(points[1]) * imat
+left = le_funcs[line_end_le](self, p1, p2, False, fill_color)
+ap += left.encode()
+if line_end_ri in le_funcs_range:
+p1 = Point(points[-2]) * imat
+p2 = Point(points[-1]) * imat
+left = le_funcs[line_end_ri](self, p1, p2, True, fill_color)
+ap += left.encode()
+if ap_updated:
+if rect:                        # rect modified here?
+self.set_rect(rect)
+self._setAP(ap, rect=1)
+else:
+self._setAP(ap, rect=0)
+#-------------------------------
+# handle annotation rotations
+#-------------------------------
+if annot_type not in (  # only these types are supported
+mupdf.PDF_ANNOT_CARET,
+mupdf.PDF_ANNOT_CIRCLE,
+mupdf.PDF_ANNOT_FILE_ATTACHMENT,
+mupdf.PDF_ANNOT_INK,
+mupdf.PDF_ANNOT_LINE,
+mupdf.PDF_ANNOT_POLY_LINE,
+mupdf.PDF_ANNOT_POLYGON,
+mupdf.PDF_ANNOT_SQUARE,
+mupdf.PDF_ANNOT_STAMP,
+mupdf.PDF_ANNOT_TEXT,
+):
+return
+rot = self.rotation  # get value from annot object
+if rot == -1:  # nothing to change
+return
+M = (self.rect.tl + self.rect.br) / 2  # center of annot rect
+if rot == 0:  # undo rotations
+if abs(apnmat - Matrix(1, 1)) < 1e-5:
+return  # matrix already is a no-op
+quad = self.rect.morph(M, ~apnmat)  # derotate rect
+self.setRect(quad.rect)
+self.set_apn_matrix(Matrix(1, 1))  # appearance matrix = no-op
+return
+mat = Matrix(rot)
+quad = self.rect.morph(M, mat)
+self.set_rect(quad.rect)
+self.set_apn_matrix(apnmat * mat)
+def update_file(self, buffer_=None, filename=None, ufilename=None, desc=None):
+"""Update attached file."""
+CheckParent(self)
+annot = self.this
+annot_obj = mupdf.pdf_annot_obj(annot)
+pdf = mupdf.pdf_get_bound_document(annot_obj)  # the owning PDF
+type = mupdf.pdf_annot_type(annot)
+if type != mupdf.PDF_ANNOT_FILE_ATTACHMENT:
+raise TypeError( MSG_BAD_ANNOT_TYPE)
+stream = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('FS'), PDF_NAME('EF'), PDF_NAME('F'))
+# the object for file content
+if not stream.m_internal:
+RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
+fs = mupdf.pdf_dict_get(annot_obj, PDF_NAME('FS'))
+# file content given
+res = JM_BufferFromBytes(buffer_)
+if buffer_ and not res.m_internal:
+raise ValueError( MSG_BAD_BUFFER)
+if res:
+JM_update_stream(pdf, stream, res, 1)
+# adjust /DL and /Size parameters
+len, _ = mupdf.fz_buffer_storage(res)
+l = mupdf.pdf_new_int(len)
+mupdf.pdf_dict_put(stream, PDF_NAME('DL'), l)
+mupdf.pdf_dict_putl(stream, l, PDF_NAME('Params'), PDF_NAME('Size'))
+if filename:
+mupdf.pdf_dict_put_text_string(stream, PDF_NAME('F'), filename)
+mupdf.pdf_dict_put_text_string(fs, PDF_NAME('F'), filename)
+mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), filename)
+mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), filename)
+mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('Contents'), filename)
+if ufilename:
+mupdf.pdf_dict_put_text_string(stream, PDF_NAME('UF'), ufilename)
+mupdf.pdf_dict_put_text_string(fs, PDF_NAME('UF'), ufilename)
+if desc:
+mupdf.pdf_dict_put_text_string(stream, PDF_NAME('Desc'), desc)
+mupdf.pdf_dict_put_text_string(fs, PDF_NAME('Desc'), desc)
+@staticmethod
+def update_timing_test():
+total = 0
+for i in range( 30*1000):
+total += i
+return total
+@property
+def vertices(self):
+"""annotation vertex points"""
+CheckParent(self)
+annot = self.this
+assert isinstance(annot, mupdf.PdfAnnot)
+annot_obj = mupdf.pdf_annot_obj(annot)
+page = _pdf_annot_page(annot)
+page_ctm = mupdf.FzMatrix()   # page transformation matrix
+dummy = mupdf.FzRect()  # Out-param for mupdf.pdf_page_transform().
+mupdf.pdf_page_transform(page, dummy, page_ctm)
+derot = JM_derotate_page_matrix(page)
+page_ctm = mupdf.fz_concat(page_ctm, derot)
+#----------------------------------------------------------------
+# The following objects occur in different annotation types.
+# So we are sure that (!o) occurs at most once.
+# Every pair of floats is one point, that needs to be separately
+# transformed with the page transformation matrix.
+#----------------------------------------------------------------
+o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('Vertices'))
+if not o.m_internal:    o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('L'))
+if not o.m_internal:    o = mupdf.pdf_dict_get(annot_obj, PDF_NAME('QuadPoints'))
+if not o.m_internal:    o = mupdf.pdf_dict_gets(annot_obj, 'CL')
+if o.m_internal:
+# handle lists with 1-level depth
+# weiter
+res = []
+for i in range(0, mupdf.pdf_array_len(o), 2):
+x = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i))
+y = mupdf.pdf_to_real(mupdf.pdf_array_get(o, i+1))
+point = mupdf.FzPoint(x, y)
+point = mupdf.fz_transform_point(point, page_ctm)
+res.append( (point.x, point.y))
+return res
+o = mupdf.pdf_dict_gets(annot_obj, 'InkList')
+if o.m_internal:
+# InkList has 2-level lists
+#inklist:
+res = []
+for i in range(mupdf.pdf_array_len(o)):
+res1 = []
+o1 = mupdf.pdf_array_get(o, i)
+for j in range(0, mupdf.pdf_array_len(o1), 2):
+x = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j))
+y = mupdf.pdf_to_real(mupdf.pdf_array_get(o1, j+1))
+point = mupdf.FzPoint(x, y)
+point = mupdf.fz_transform_point(point, page_ctm)
+res1.append( (point.x, point.y))
+res.append(res1)
+return res
+@property
+def xref(self):
+"""annotation xref number"""
+CheckParent(self)
+annot = self.this
+return mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot))
+class Archive:
+def __init__( self, *args):
+'''
+Archive(dirname [, path]) - from folder
+Archive(file [, path]) - from file name or object
+Archive(data, name) - from memory item
+Archive() - empty archive
+Archive(archive [, path]) - from archive
+'''
+self._subarchives = list()
+self.this = mupdf.fz_new_multi_archive()
+if args:
+self.add( *args)
+def __repr__( self):
+return f'Archive, sub-archives: {len(self._subarchives)}'
+def _add_arch( self, subarch, path=None):
+mupdf.fz_mount_multi_archive( self.this, subarch, path)
+def _add_dir( self, folder, path=None):
+sub = mupdf.fz_open_directory( folder)
+mupdf.fz_mount_multi_archive( self.this, sub, path)
+def _add_treeitem( self, memory, name, path=None):
+buff = JM_BufferFromBytes( memory)
+sub = mupdf.fz_new_tree_archive( mupdf.FzTree())
+mupdf.fz_tree_archive_add_buffer( sub, name, buff)
+mupdf.fz_mount_multi_archive( self.this, sub, path)
+def _add_ziptarfile( self, filepath, type_, path=None):
+if type_ == 1:
+sub = mupdf.fz_open_zip_archive( filepath)
+else:
+sub = mupdf.fz_open_tar_archive( filepath)
+mupdf.fz_mount_multi_archive( self.this, sub, path)
+def _add_ziptarmemory( self, memory, type_, path=None):
+buff = JM_BufferFromBytes( memory)
+stream = mupdf.fz_open_buffer( buff)
+if type_==1:
+sub = mupdf.fz_open_zip_archive_with_stream( stream)
+else:
+sub = mupdf.fz_open_tar_archive_with_stream( stream)
+mupdf.fz_mount_multi_archive( self.this, sub, path)
+def add( self, content, path=None):
+'''
+Add a sub-archive.
+Args:
+content:
+The content to be added. May be one of:
+`str` - must be path of directory or file.
+`bytes`, `bytearray`, `io.BytesIO` - raw data.
+`zipfile.Zipfile`.
+`tarfile.TarFile`.
+`pymupdf.Archive`.
+A two-item tuple `(data, name)`.
+List or tuple (but not tuple with length 2) of the above.
+path: (str) a "virtual" path name, under which the elements
+of content can be retrieved. Use it to e.g. cope with
+duplicate element names.
+'''
+def is_binary_data(x):
+return isinstance(x, (bytes, bytearray, io.BytesIO))
+def make_subarch(entries, mount, fmt):
+subarch = dict(fmt=fmt, entries=entries, path=mount)
+if fmt != "tree" or self._subarchives == []:
+self._subarchives.append(subarch)
+else:
+ltree = self._subarchives[-1]
+if ltree["fmt"] != "tree" or ltree["path"] != subarch["path"]:
+self._subarchives.append(subarch)
+else:
+ltree["entries"].extend(subarch["entries"])
+self._subarchives[-1] = ltree
+if isinstance(content, pathlib.Path):
+content = str(content)
+if isinstance(content, str):
+if os.path.isdir(content):
+self._add_dir(content, path)
+return make_subarch(os.listdir(content), path, 'dir')
+elif os.path.isfile(content):
+assert isinstance(path, str) and path != '', \
+f'Need name for binary content, but {path=}.'
+with open(content) as f:
+ff = f.read()
+self._add_treeitem(ff, path)
+return make_subarch([path], None, 'tree')
+else:
+raise ValueError(f'Not a file or directory: {content!r}')
+elif is_binary_data(content):
+assert isinstance(path, str) and path != '' \
+f'Need name for binary content, but {path=}.'
+self._add_treeitem(content, path)
+return make_subarch([path], None, 'tree')
+elif isinstance(content, zipfile.ZipFile):
+filename = getattr(content, "filename", None)
+if filename is None:
+fp = content.fp.getvalue()
+self._add_ziptarmemory(fp, 1, path)
+else:
+self._add_ziptarfile(filename, 1, path)
+return make_subarch(content.namelist(), path, 'zip')
+elif isinstance(content, tarfile.TarFile):
+filename = getattr(content.fileobj, "name", None)
+if filename is None:
+fp = content.fileobj
+if not isinstance(fp, io.BytesIO):
+fp = fp.fileobj
+self._add_ziptarmemory(fp.getvalue(), 0, path)
+else:
+self._add_ziptarfile(filename, 0, path)
+return make_subarch(content.getnames(), path, 'tar')
+elif isinstance(content, Archive):
+self._add_arch(content, path)
+return make_subarch([], path, 'multi')
+if isinstance(content, tuple) and len(content) == 2:
+# covers the tree item plus path
+data, name = content
+assert isinstance(name, str), f'Unexpected {type(name)=}'
+if is_binary_data(data):
+self._add_treeitem(data, name, path=path)
+elif isinstance(data, str):
+if os.path.isfile(data):
+with open(data, 'rb') as f:
+ff = f.read()
+self._add_treeitem(ff, name, path=path)
+else:
+assert 0, f'Unexpected {type(data)=}.'
+return make_subarch([name], path, 'tree')
+elif hasattr(content, '__getitem__'):
+# Deal with sequence of disparate items.
+for item in content:
+self.add(item, path)
+return
+else:
+raise TypeError(f'Unrecognised type {type(content)}.')
+assert 0
+@property
+def entry_list( self):
+'''
+List of sub archives.
+'''
+return self._subarchives
+def has_entry( self, name):
+return mupdf.fz_has_archive_entry( self.this, name)
+def read_entry( self, name):
+buff = mupdf.fz_read_archive_entry( self.this, name)
+return JM_BinFromBuffer( buff)
+class Xml:
+def __enter__(self):
+return self
+def __exit__(self, *args):
+pass
+def __init__(self, rhs):
+if isinstance(rhs, mupdf.FzXml):
+self.this = rhs
+elif isinstance(rhs, str):
+buff = mupdf.fz_new_buffer_from_copied_data(rhs)
+self.this = mupdf.fz_parse_xml_from_html5(buff)
+else:
+assert 0, f'Unsupported type for rhs: {type(rhs)}'
+def _get_node_tree( self):
+def show_node(node, items, shift):
+while node is not None:
+if node.is_text:
+items.append((shift, f'"{node.text}"'))
+node = node.next
+continue
+items.append((shift, f"({node.tagname}"))
+for k, v in node.get_attributes().items():
+items.append((shift, f"={k} '{v}'"))
+child = node.first_child
+if child:
+items = show_node(child, items, shift + 1)
+items.append((shift, f"){node.tagname}"))
+node = node.next
+return items
+shift = 0
+items = []
+items = show_node(self, items, shift)
+return items
+def add_bullet_list(self):
+"""Add bulleted list ("ul" tag)"""
+child = self.create_element("ul")
+self.append_child(child)
+return child
+def add_class(self, text):
+"""Set some class via CSS. Replaces complete class spec."""
+cls = self.get_attribute_value("class")
+if cls is not None and text in cls:
+return self
+self.remove_attribute("class")
+if cls is None:
+cls = text
+else:
+cls += " " + text
+self.set_attribute("class", cls)
+return self
+def add_code(self, text=None):
+"""Add a "code" tag"""
+child = self.create_element("code")
+if type(text) is str:
+child.append_child(self.create_text_node(text))
+prev = self.span_bottom()
+if prev is None:
+prev = self
+prev.append_child(child)
+return self
+def add_codeblock(self):
+"""Add monospaced lines ("pre" node)"""
+child = self.create_element("pre")
+self.append_child(child)
+return child
+def add_description_list(self):
+"""Add description list ("dl" tag)"""
+child = self.create_element("dl")
+self.append_child(child)
+return child
+def add_division(self):
+"""Add "div" tag"""
+child = self.create_element("div")
+self.append_child(child)
+return child
+def add_header(self, level=1):
+"""Add header tag"""
+if level not in range(1, 7):
+raise ValueError("Header level must be in [1, 6]")
+this_tag = self.tagname
+new_tag = f"h{level}"
+child = self.create_element(new_tag)
+if this_tag not in ("h1", "h2", "h3", "h4", "h5", "h6", "p"):
+self.append_child(child)
+return child
+self.parent.append_child(child)
+return child
+def add_horizontal_line(self):
+"""Add horizontal line ("hr" tag)"""
+child = self.create_element("hr")
+self.append_child(child)
+return child
+def add_image(self, name, width=None, height=None, imgfloat=None, align=None):
+"""Add image node (tag "img")."""
+child = self.create_element("img")
+if width is not None:
+child.set_attribute("width", f"{width}")
+if height is not None:
+child.set_attribute("height", f"{height}")
+if imgfloat is not None:
+child.set_attribute("style", f"float: {imgfloat}")
+if align is not None:
+child.set_attribute("align", f"{align}")
+child.set_attribute("src", f"{name}")
+self.append_child(child)
+return child
+def add_link(self, href, text=None):
+"""Add a hyperlink ("a" tag)"""
+child = self.create_element("a")
+if not isinstance(text, str):
+text = href
+child.set_attribute("href", href)
+child.append_child(self.create_text_node(text))
+prev = self.span_bottom()
+if prev is None:
+prev = self
+prev.append_child(child)
+return self
+def add_list_item(self):
+"""Add item ("li" tag) under a (numbered or bulleted) list."""
+if self.tagname not in ("ol", "ul"):
+raise ValueError("cannot add list item to", self.tagname)
+child = self.create_element("li")
+self.append_child(child)
+return child
+def add_number_list(self, start=1, numtype=None):
+"""Add numbered list ("ol" tag)"""
+child = self.create_element("ol")
+if start > 1:
+child.set_attribute("start", str(start))
+if numtype is not None:
+child.set_attribute("type", numtype)
+self.append_child(child)
+return child
+def add_paragraph(self):
+"""Add "p" tag"""
+child = self.create_element("p")
+if self.tagname != "p":
+self.append_child(child)
+else:
+self.parent.append_child(child)
+return child
+def add_span(self):
+child = self.create_element("span")
+self.append_child(child)
+return child
+def add_style(self, text):
+"""Set some style via CSS style. Replaces complete style spec."""
+style = self.get_attribute_value("style")
+if style is not None and text in style:
+return self
+self.remove_attribute("style")
+if style is None:
+style = text
+else:
+style += ";" + text
+self.set_attribute("style", style)
+return self
+def add_subscript(self, text=None):
+"""Add a subscript ("sub" tag)"""
+child = self.create_element("sub")
+if type(text) is str:
+child.append_child(self.create_text_node(text))
+prev = self.span_bottom()
+if prev is None:
+prev = self
+prev.append_child(child)
+return self
+def add_superscript(self, text=None):
+"""Add a superscript ("sup" tag)"""
+child = self.create_element("sup")
+if type(text) is str:
+child.append_child(self.create_text_node(text))
+prev = self.span_bottom()
+if prev is None:
+prev = self
+prev.append_child(child)
+return self
+def add_text(self, text):
+"""Add text. Line breaks are honored."""
+lines = text.splitlines()
+line_count = len(lines)
+prev = self.span_bottom()
+if prev is None:
+prev = self
+for i, line in enumerate(lines):
+prev.append_child(self.create_text_node(line))
+if i < line_count - 1:
+prev.append_child(self.create_element("br"))
+return self
+def append_child( self, child):
+mupdf.fz_dom_append_child( self.this, child.this)
+def append_styled_span(self, style):
+span = self.create_element("span")
+span.add_style(style)
+prev = self.span_bottom()
+if prev is None:
+prev = self
+prev.append_child(span)
+return prev
+def bodytag( self):
+return Xml( mupdf.fz_dom_body( self.this))
+def clone( self):
+ret = mupdf.fz_dom_clone( self.this)
+return Xml( ret)
+@staticmethod
+def color_text(color):
+if type(color) is str:
+return color
+if type(color) is int:
+return f"rgb({sRGB_to_rgb(color)})"
+if type(color) in (tuple, list):
+return f"rgb{tuple(color)}"
+return color
+def create_element( self, tag):
+return Xml( mupdf.fz_dom_create_element( self.this, tag))
+def create_text_node( self, text):
+return Xml( mupdf.fz_dom_create_text_node( self.this, text))
+def debug(self):
+"""Print a list of the node tree below self."""
+items = self._get_node_tree()
+for item in items:
+message("  " * item[0] + item[1].replace("\n", "\\n"))
+def find( self, tag, att, match):
+ret = mupdf.fz_dom_find( self.this, tag, att, match)
+if ret.m_internal:
+return Xml( ret)
+def find_next( self, tag, att, match):
+ret = mupdf.fz_dom_find_next( self.this, tag, att, match)
+if ret.m_internal:
+return Xml( ret)
+@property
+def first_child( self):
+if mupdf.fz_xml_text( self.this):
+# text node, has no child.
+return
+ret = mupdf.fz_dom_first_child( self)
+if ret.m_internal:
+return Xml( ret)
+def get_attribute_value( self, key):
+assert key
+return mupdf.fz_dom_attribute( self.this, key)
+def get_attributes( self):
+if mupdf.fz_xml_text( self.this):
+# text node, has no attributes.
+return
+result = dict()
+i = 0
+while 1:
+val, key = mupdf.fz_dom_get_attribute( self.this, i)
+if not val or not key:
+break
+result[ key] = val
+i += 1
+return result
+def insert_after( self, node):
+mupdf.fz_dom_insert_after( self.this, node.this)
+def insert_before( self, node):
+mupdf.fz_dom_insert_before( self.this, node.this)
+def insert_text(self, text):
+lines = text.splitlines()
+line_count = len(lines)
+for i, line in enumerate(lines):
+self.append_child(self.create_text_node(line))
+if i < line_count - 1:
+self.append_child(self.create_element("br"))
+return self
+@property
+def is_text(self):
+"""Check if this is a text node."""
+return self.text is not None
+@property
+def last_child(self):
+"""Return last child node."""
+child = self.first_child
+if child is None:
+return None
+while True:
+next = child.next
+if not next:
+return child
+child = next
+@property
+def next( self):
+ret = mupdf.fz_dom_next( self.this)
+if ret.m_internal:
+return Xml( ret)
+@property
+def parent( self):
+ret = mupdf.fz_dom_parent( self.this)
+if ret.m_internal:
+return Xml( ret)
+@property
+def previous( self):
+ret = mupdf.fz_dom_previous( self.this)
+if ret.m_internal:
+return Xml( ret)
+def remove( self):
+mupdf.fz_dom_remove( self.this)
+def remove_attribute( self, key):
+assert key
+mupdf.fz_dom_remove_attribute( self.this, key)
+@property
+def root( self):
+return Xml( mupdf.fz_xml_root( self.this))
+def set_align(self, align):
+"""Set text alignment via CSS style"""
+text = "text-align: %s"
+if isinstance( align, str):
+t = align
+elif align == TEXT_ALIGN_LEFT:
+t = "left"
+elif align == TEXT_ALIGN_CENTER:
+t = "center"
+elif align == TEXT_ALIGN_RIGHT:
+t = "right"
+elif align == TEXT_ALIGN_JUSTIFY:
+t = "justify"
+else:
+raise ValueError(f"Unrecognised {align=}")
+text = text % t
+self.add_style(text)
+return self
+def set_attribute( self, key, value):
+assert key
+mupdf.fz_dom_add_attribute( self.this, key, value)
+def set_bgcolor(self, color):
+"""Set background color via CSS style"""
+text = f"background-color: %s" % self.color_text(color)
+self.add_style(text)  # does not work on span level
+return self
+def set_bold(self, val=True):
+"""Set bold on / off via CSS style"""
+if val:
+val="bold"
+else:
+val="normal"
+text = "font-weight: %s" % val
+self.append_styled_span(text)
+return self
+def set_color(self, color):
+"""Set text color via CSS style"""
+text = f"color: %s" % self.color_text(color)
+self.append_styled_span(text)
+return self
+def set_columns(self, cols):
+"""Set number of text columns via CSS style"""
+text = f"columns: {cols}"
+self.append_styled_span(text)
+return self
+def set_font(self, font):
+"""Set font-family name via CSS style"""
+text = "font-family: %s" % font
+self.append_styled_span(text)
+return self
+def set_fontsize(self, fontsize):
+"""Set font size name via CSS style"""
+if type(fontsize) is str:
+px=""
+else:
+px="px"
+text = f"font-size: {fontsize}{px}"
+self.append_styled_span(text)
+return self
+def set_id(self, unique):
+"""Set a unique id."""
+# check uniqueness
+root = self.root
+if root.find(None, "id", unique):
+raise ValueError(f"id '{unique}' already exists")
+self.set_attribute("id", unique)
+return self
+def set_italic(self, val=True):
+"""Set italic on / off via CSS style"""
+if val:
+val="italic"
+else:
+val="normal"
+text = "font-style: %s" % val
+self.append_styled_span(text)
+return self
+def set_leading(self, leading):
+"""Set inter-line spacing value via CSS style - block-level only."""
+text = f"-mupdf-leading: {leading}"
+self.add_style(text)
+return self
+def set_letter_spacing(self, spacing):
+"""Set inter-letter spacing value via CSS style"""
+text = f"letter-spacing: {spacing}"
+self.append_styled_span(text)
+return self
+def set_lineheight(self, lineheight):
+"""Set line height name via CSS style - block-level only."""
+text = f"line-height: {lineheight}"
+self.add_style(text)
+return self
+def set_margins(self, val):
+"""Set margin values via CSS style"""
+text = "margins: %s" % val
+self.append_styled_span(text)
+return self
+def set_opacity(self, opacity):
+"""Set opacity via CSS style"""
+text = f"opacity: {opacity}"
+self.append_styled_span(text)
+return self
+def set_pagebreak_after(self):
+"""Insert a page break after this node."""
+text = "page-break-after: always"
+self.add_style(text)
+return self
+def set_pagebreak_before(self):
+"""Insert a page break before this node."""
+text = "page-break-before: always"
+self.add_style(text)
+return self
+def set_properties(
+self,
+align=None,
+bgcolor=None,
+bold=None,
+color=None,
+columns=None,
+font=None,
+fontsize=None,
+indent=None,
+italic=None,
+leading=None,
+letter_spacing=None,
+lineheight=None,
+margins=None,
+pagebreak_after=None,
+pagebreak_before=None,
+word_spacing=None,
+unqid=None,
+cls=None,
+):
+"""Set any or all properties of a node.
+To be used for existing nodes preferably.
+"""
+root = self.root
+temp = root.add_division()
+if align is not None:
+temp.set_align(align)
+if bgcolor is not None:
+temp.set_bgcolor(bgcolor)
+if bold is not None:
+temp.set_bold(bold)
+if color is not None:
+temp.set_color(color)
+if columns is not None:
+temp.set_columns(columns)
+if font is not None:
+temp.set_font(font)
+if fontsize is not None:
+temp.set_fontsize(fontsize)
+if indent is not None:
+temp.set_text_indent(indent)
+if italic is not None:
+temp.set_italic(italic)
+if leading is not None:
+temp.set_leading(leading)
+if letter_spacing is not None:
+temp.set_letter_spacing(letter_spacing)
+if lineheight is not None:
+temp.set_lineheight(lineheight)
+if margins is not None:
+temp.set_margins(margins)
+if pagebreak_after is not None:
+temp.set_pagebreak_after()
+if pagebreak_before is not None:
+temp.set_pagebreak_before()
+if word_spacing is not None:
+temp.set_word_spacing(word_spacing)
+if unqid is not None:
+self.set_id(unqid)
+if cls is not None:
+self.add_class(cls)
+styles = []
+top_style = temp.get_attribute_value("style")
+if top_style is not None:
+styles.append(top_style)
+child = temp.first_child
+while child:
+styles.append(child.get_attribute_value("style"))
+child = child.first_child
+self.set_attribute("style", ";".join(styles))
+temp.remove()
+return self
+def set_text_indent(self, indent):
+"""Set text indentation name via CSS style - block-level only."""
+text = f"text-indent: {indent}"
+self.add_style(text)
+return self
+def set_underline(self, val="underline"):
+text = "text-decoration: %s" % val
+self.append_styled_span(text)
+return self
+def set_word_spacing(self, spacing):
+"""Set inter-word spacing value via CSS style"""
+text = f"word-spacing: {spacing}"
+self.append_styled_span(text)
+return self
+def span_bottom(self):
+"""Find deepest level in stacked spans."""
+parent = self
+child = self.last_child
+if child is None:
+return None
+while child.is_text:
+child = child.previous
+if child is None:
+break
+if child is None or child.tagname != "span":
+return None
+while True:
+if child is None:
+return parent
+if child.tagname in ("a", "sub","sup","body") or child.is_text:
+child = child.next
+continue
+if child.tagname == "span":
+parent = child
+child = child.first_child
+else:
+return parent
+@property
+def tagname( self):
+return mupdf.fz_xml_tag( self.this)
+@property
+def text( self):
+return mupdf.fz_xml_text( self.this)
+add_var = add_code
+add_samp = add_code
+add_kbd = add_code
+class Colorspace:
+def __init__(self, type_):
+"""Supported are GRAY, RGB and CMYK."""
+if isinstance( type_, mupdf.FzColorspace):
+self.this = type_
+elif type_ == CS_GRAY:
+self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_GRAY)
+elif type_ == CS_CMYK:
+self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_CMYK)
+elif type_ == CS_RGB:
+self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
+else:
+self.this = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
+def __repr__(self):
+x = ("", "GRAY", "", "RGB", "CMYK")[self.n]
+return "Colorspace(CS_%s) - %s" % (x, self.name)
+def _name(self):
+return mupdf.fz_colorspace_name(self.this)
+@property
+def n(self):
+"""Size of one pixel."""
+return mupdf.fz_colorspace_n(self.this)
+@property
+def name(self):
+"""Name of the Colorspace."""
+return self._name()
+class DeviceWrapper:
+def __init__(self, *args):
+if args_match( args, mupdf.FzDevice):
+device, = args
+self.this = device
+elif args_match( args, Pixmap, None):
+pm, clip = args
+bbox = JM_irect_from_py( clip)
+if mupdf.fz_is_infinite_irect( bbox):
+self.this = mupdf.fz_new_draw_device( mupdf.FzMatrix(), pm)
+else:
+self.this = mupdf.fz_new_draw_device_with_bbox( mupdf.FzMatrix(), pm, bbox)
+elif args_match( args, mupdf.FzDisplayList):
+dl, = args
+self.this = mupdf.fz_new_list_device( dl)
+elif args_match( args, mupdf.FzStextPage, None):
+tp, flags = args
+opts = mupdf.FzStextOptions( flags)
+self.this = mupdf.fz_new_stext_device( tp, opts)
+else:
+raise Exception( f'Unrecognised args for DeviceWrapper: {args!r}')
+class DisplayList:
+def __del__(self):
+if not type(self) is DisplayList: return
+self.thisown = False
+def __init__(self, *args):
+if len(args) == 1 and isinstance(args[0], mupdf.FzRect):
+self.this = mupdf.FzDisplayList(args[0])
+elif len(args) == 1 and isinstance(args[0], mupdf.FzDisplayList):
+self.this = args[0]
+else:
+assert 0, f'Unrecognised {args=}'
+def get_pixmap(self, matrix=None, colorspace=None, alpha=0, clip=None):
+if isinstance(colorspace, Colorspace):
+colorspace = colorspace.this
+else:
+colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
+val = JM_pixmap_from_display_list(self.this, matrix, colorspace, alpha, clip, None)
+val.thisown = True
+return val
+def get_textpage(self, flags=3):
+"""Make a TextPage from a DisplayList."""
+stext_options = mupdf.FzStextOptions()
+stext_options.flags = flags
+val = mupdf.FzStextPage(self.this, stext_options)
+val.thisown = True
+return val
+@property
+def rect(self):
+val = JM_py_from_rect(mupdf.fz_bound_display_list(self.this))
+val = Rect(val)
+return val
+def run(self, dw, m, area):
+mupdf.fz_run_display_list(
+self.this,
+dw.device,
+JM_matrix_from_py(m),
+JM_rect_from_py(area),
+mupdf.FzCookie(),
+)
+if g_use_extra:
+extra_FzDocument_insert_pdf = extra.FzDocument_insert_pdf
+class Document:
+def __contains__(self, loc) -> bool:
+if type(loc) is int:
+if loc < self.page_count:
+return True
+return False
+if type(loc) not in (tuple, list) or len(loc) != 2:
+return False
+chapter, pno = loc
+if (0
+or not isinstance(chapter, int)
+or chapter < 0
+or chapter >= self.chapter_count
+):
+return False
+if (0
+or not isinstance(pno, int)
+or pno < 0
+or pno >= self.chapter_page_count(chapter)
+):
+return False
+return True
+def __delitem__(self, i)->None:
+if not self.is_pdf:
+raise ValueError("is no PDF")
+if type(i) is int:
+return self.delete_page(i)
+if type(i) in (list, tuple, range):
+return self.delete_pages(i)
+if type(i) is not slice:
+raise ValueError("bad argument type")
+pc = self.page_count
+start = i.start if i.start else 0
+stop = i.stop if i.stop else pc
+step = i.step if i.step else 1
+while start < 0:
+start += pc
+if start >= pc:
+raise ValueError("bad page number(s)")
+while stop < 0:
+stop += pc
+if stop > pc:
+raise ValueError("bad page number(s)")
+return self.delete_pages(range(start, stop, step))
+def __enter__(self):
+return self
+def __exit__(self, *args):
+self.close()
+@typing.overload
+def __getitem__(self, i: int = 0) -> Page:
+...
+if sys.version_info >= (3, 9):
+@typing.overload
+def __getitem__(self, i: slice) -> list[Page]:
+...
+@typing.overload
+def __getitem__(self, i: tuple[int, int]) -> Page:
+...
+def __getitem__(self, i=0):
+if isinstance(i, slice):
+return [self[j] for j in range(*i.indices(len(self)))]
+assert isinstance(i, int) or (isinstance(i, tuple) and len(i) == 2 and all(isinstance(x, int) for x in i)), \
+f'Invalid item number: {i=}.'
+if i not in self:
+raise IndexError(f"page {i} not in document")
+return self.load_page(i)
+def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0, height=0, fontsize=11):
+"""Creates a document. Use 'open' as a synonym.
+Notes:
+Basic usages:
+open() - new PDF document
+open(filename) - string or pathlib.Path, must have supported
+file extension.
+open(type, buffer) - type: valid extension, buffer: bytes object.
+open(stream=buffer, filetype=type) - keyword version of previous.
+open(filename, fileype=type) - filename with unrecognized extension.
+rect, width, height, fontsize: layout reflowable document
+on open (e.g. EPUB). Ignored if n/a.
+"""
+# We temporarily set JM_mupdf_show_errors=0 while we are constructing,
+# then restore its original value in a `finally:` block.
+#
+global JM_mupdf_show_errors
+JM_mupdf_show_errors_old = JM_mupdf_show_errors
+JM_mupdf_show_errors = 0
+try:
+self.is_closed    = False
+self.is_encrypted = False
+self.is_encrypted = False
+self.metadata    = None
+self.FontInfos   = []
+self.Graftmaps   = {}
+self.ShownPages  = {}
+self.InsertedImages  = {}
+self._page_refs  = weakref.WeakValueDictionary()
+if isinstance(filename, mupdf.PdfDocument):
+pdf_document = filename
+self.this = pdf_document
+self.this_is_pdf = True
+return
+w = width
+h = height
+r = JM_rect_from_py(rect)
+if not mupdf.fz_is_infinite_rect(r):
+w = r.x1 - r.x0
+h = r.y1 - r.y0
+self._name = filename
+self.stream = stream
+if stream is not None:
+if filename is not None and filetype is None:
+# 2025-05-06: Use <filename> as the filetype. This is
+# reversing precedence - we used to use <filename> if both
+# were set.
+filetype = filename
+if isinstance(stream, (bytes, memoryview)):
+pass
+elif isinstance(stream, bytearray):
+stream = bytes(stream)
+elif isinstance(stream, io.BytesIO):
+stream = stream.getvalue()
+else:
+raise TypeError(f"bad stream: {type(stream)=}.")
+self.stream = stream
+assert isinstance(stream, (bytes, memoryview))
+if len(stream) == 0:
+# MuPDF raise an exception for this but also generates
+# warnings, which is not very helpful for us. So instead we
+# raise a specific exception.
+raise EmptyFileError('Cannot open empty stream.')
+stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
+try:
+doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2)
+except Exception as e:
+if g_exceptions_verbose > 1:    exception_info()
+raise FileDataError('Failed to open stream') from e
+elif filename:
+assert not stream
+if isinstance(filename, str):
+pass
+elif hasattr(filename, "absolute"):
+filename = str(filename)
+elif hasattr(filename, "name"):
+filename = filename.name
+else:
+raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
+self._name = filename
+# Generate our own specific exceptions. This avoids MuPDF
+# generating warnings etc.
+if not os.path.exists(filename):
+raise FileNotFoundError(f"no such file: '{filename}'")
+elif not os.path.isfile(filename):
+raise FileDataError(f"'{filename}' is no file")
+elif os.path.getsize(filename) == 0:
+raise EmptyFileError(f'Cannot open empty file: {filename=}.')
+if filetype:
+# Override the type implied by <filename>. MuPDF does not
+# have a way to do this directly so we open via a stream.
+try:
+fz_stream = mupdf.fz_open_file(filename)
+doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
+except Exception as e:
+if g_exceptions_verbose > 1:    exception_info()
+raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
+else:
+try:
+doc = mupdf.fz_open_document(filename)
+except Exception as e:
+if g_exceptions_verbose > 1:    exception_info()
+raise FileDataError(f'Failed to open file {filename!r}.') from e
+else:
+pdf = mupdf.PdfDocument()
+doc = mupdf.FzDocument(pdf)
+if w > 0 and h > 0:
+mupdf.fz_layout_document(doc, w, h, fontsize)
+elif mupdf.fz_is_document_reflowable(doc):
+mupdf.fz_layout_document(doc, 400, 600, 11)
+self.this = doc
+# fixme: not sure where self.thisown gets initialised in PyMuPDF.
+#
+self.thisown = True
+if self.thisown:
+self._graft_id = TOOLS.gen_id()
+if self.needs_pass:
+self.is_encrypted = True
+else: # we won't init until doc is decrypted
+self.init_doc()
+# the following hack detects invalid/empty SVG files, which else may lead
+# to interpreter crashes
+if filename and filename.lower().endswith("svg") or filetype and "svg" in filetype.lower():
+try:
+_ = self.convert_to_pdf()  # this seems to always work
+except Exception as e:
+if g_exceptions_verbose > 1:    exception_info()
+raise FileDataError("cannot open broken document") from e
+if g_use_extra:
+self.this_is_pdf = isinstance( self.this, mupdf.PdfDocument)
+if self.this_is_pdf:
+self.page_count2 = extra.page_count_pdf
+else:
+self.page_count2 = extra.page_count_fz
+finally:
+JM_mupdf_show_errors = JM_mupdf_show_errors_old
+def __len__(self) -> int:
+return self.page_count
+def __repr__(self) -> str:
+m = "closed " if self.is_closed else ""
+if self.stream is None:
+if self.name == "":
+return m + "Document(<new PDF, doc# %i>)" % self._graft_id
+return m + "Document('%s')" % (self.name,)
+return m + "Document('%s', <memory, doc# %i>)" % (self.name, self._graft_id)
+def _addFormFont(self, name, font):
+"""Add new form font."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return
+fonts = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer( pdf),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('DR'),
+PDF_NAME('Font'),
+)
+if not fonts.m_internal or not mupdf.pdf_is_dict( fonts):
+raise RuntimeError( "PDF has no form fonts yet")
+k = mupdf.pdf_new_name( name)
+v = JM_pdf_obj_from_str( pdf, font)
+mupdf.pdf_dict_put( fonts, k, v)
+def _delToC(self):
+"""Delete the TOC."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+xrefs = []  # create Python list
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return xrefs    # not a pdf
+# get the main root
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+# get the outline root
+olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
+if not olroot.m_internal:
+return xrefs    # no outlines or some problem
+first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))  # first outline
+xrefs = JM_outline_xrefs(first, xrefs)
+xref_count = len(xrefs)
+olroot_xref = mupdf.pdf_to_num(olroot) # delete OL root
+mupdf.pdf_delete_object(pdf, olroot_xref)  # delete OL root
+mupdf.pdf_dict_del(root, PDF_NAME('Outlines')) # delete OL root
+for i in range(xref_count):
+_, xref = JM_INT_ITEM(xrefs, i)
+mupdf.pdf_delete_object(pdf, xref) # delete outline item
+xrefs.append(olroot_xref)
+val = xrefs
+self.init_doc()
+return val
+def _delete_page(self, pno):
+pdf = _as_pdf_document(self)
+mupdf.pdf_delete_page( pdf, pno)
+if pdf.m_internal.rev_page_map:
+mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
+def _deleteObject(self, xref):
+"""Delete object."""
+pdf = _as_pdf_document(self)
+if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
+raise ValueError( MSG_BAD_XREF)
+mupdf.pdf_delete_object(pdf, xref)
+def _embeddedFileGet(self, idx):
+pdf = _as_pdf_document(self)
+names = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+entry = mupdf.pdf_array_get(names, 2*idx+1)
+filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
+buf = mupdf.pdf_load_stream(filespec)
+cont = JM_BinFromBuffer(buf)
+return cont
+def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int:
+filenames = self.embfile_names()
+msg = "'%s' not in EmbeddedFiles array." % str(item)
+if item in filenames:
+idx = filenames.index(item)
+elif item in range(len(filenames)):
+idx = item
+else:
+raise ValueError(msg)
+return idx
+def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None):
+pdf = _as_pdf_document(self)
+data = JM_BufferFromBytes(buffer_)
+if not data.m_internal:
+raise TypeError( MSG_BAD_BUFFER)
+names = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+if not mupdf.pdf_is_array(names):
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+names = mupdf.pdf_new_array(pdf, 6)    # an even number!
+mupdf.pdf_dict_putl(
+root,
+names,
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+fileentry = JM_embed_file(pdf, data, filename, ufilename, desc, 1)
+xref = mupdf.pdf_to_num(
+mupdf.pdf_dict_getl(fileentry, PDF_NAME('EF'), PDF_NAME('F'))
+)
+mupdf.pdf_array_push(names, mupdf.pdf_new_text_string(name))
+mupdf.pdf_array_push(names, fileentry)
+return xref
+def _embfile_del(self, idx):
+pdf = _as_pdf_document(self)
+names = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+mupdf.pdf_array_delete(names, idx + 1)
+mupdf.pdf_array_delete(names, idx)
+def _embfile_info(self, idx, infodict):
+pdf = _as_pdf_document(self)
+xref = 0
+ci_xref=0
+trailer = mupdf.pdf_trailer(pdf)
+names = mupdf.pdf_dict_getl(
+trailer,
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+o = mupdf.pdf_array_get(names, 2*idx+1)
+ci = mupdf.pdf_dict_get(o, PDF_NAME('CI'))
+if ci.m_internal:
+ci_xref = mupdf.pdf_to_num(ci)
+infodict["collection"] = ci_xref
+name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('F')))
+infodict[dictkey_filename] = JM_EscapeStrFromStr(name)
+name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('UF')))
+infodict[dictkey_ufilename] = JM_EscapeStrFromStr(name)
+name = mupdf.pdf_to_text_string(mupdf.pdf_dict_get(o, PDF_NAME('Desc')))
+infodict[dictkey_descr] = JM_UnicodeFromStr(name)
+len_ = -1
+DL = -1
+fileentry = mupdf.pdf_dict_getl(o, PDF_NAME('EF'), PDF_NAME('F'))
+xref = mupdf.pdf_to_num(fileentry)
+o = mupdf.pdf_dict_get(fileentry, PDF_NAME('Length'))
+if o.m_internal:
+len_ = mupdf.pdf_to_int(o)
+o = mupdf.pdf_dict_get(fileentry, PDF_NAME('DL'))
+if o.m_internal:
+DL = mupdf.pdf_to_int(o)
+else:
+o = mupdf.pdf_dict_getl(fileentry, PDF_NAME('Params'), PDF_NAME('Size'))
+if o.m_internal:
+DL = mupdf.pdf_to_int(o)
+infodict[dictkey_size] = DL
+infodict[dictkey_length] = len_
+return xref
+def _embfile_names(self, namelist):
+"""Get list of embedded file names."""
+pdf = _as_pdf_document(self)
+names = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+if mupdf.pdf_is_array(names):
+n = mupdf.pdf_array_len(names)
+for i in range(0, n, 2):
+val = JM_EscapeStrFromStr(
+mupdf.pdf_to_text_string(
+mupdf.pdf_array_get(names, i)
+)
+)
+namelist.append(val)
+def _embfile_upd(self, idx, buffer_=None, filename=None, ufilename=None, desc=None):
+pdf = _as_pdf_document(self)
+xref = 0
+names = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+entry = mupdf.pdf_array_get(names, 2*idx+1)
+filespec = mupdf.pdf_dict_getl(entry, PDF_NAME('EF'), PDF_NAME('F'))
+if not filespec.m_internal:
+RAISEPY( "bad PDF: no /EF object", JM_Exc_FileDataError)
+res = JM_BufferFromBytes(buffer_)
+if buffer_ and buffer_.m_internal and not res.m_internal:
+raise TypeError( MSG_BAD_BUFFER)
+if res.m_internal and buffer_ and buffer_.m_internal:
+JM_update_stream(pdf, filespec, res, 1)
+# adjust /DL and /Size parameters
+len, _ = mupdf.fz_buffer_storage(res)
+l = mupdf.pdf_new_int(len)
+mupdf.pdf_dict_put(filespec, PDF_NAME('DL'), l)
+mupdf.pdf_dict_putl(filespec, l, PDF_NAME('Params'), PDF_NAME('Size'))
+xref = mupdf.pdf_to_num(filespec)
+if filename:
+mupdf.pdf_dict_put_text_string(entry, PDF_NAME('F'), filename)
+if ufilename:
+mupdf.pdf_dict_put_text_string(entry, PDF_NAME('UF'), ufilename)
+if desc:
+mupdf.pdf_dict_put_text_string(entry, PDF_NAME('Desc'), desc)
+return xref
+def _extend_toc_items(self, items):
+"""Add color info to all items of an extended TOC list."""
+if self.is_closed:
+raise ValueError("document closed")
+if g_use_extra:
+return extra.Document_extend_toc_items( self.this, items)
+pdf = _as_pdf_document(self)
+zoom = "zoom"
+bold = "bold"
+italic = "italic"
+collapse = "collapse"
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+if not root.m_internal:
+return
+olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
+if not olroot.m_internal:
+return
+first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
+if not first.m_internal:
+return
+xrefs = []
+xrefs = JM_outline_xrefs(first, xrefs)
+n = len(xrefs)
+m = len(items)
+if not n:
+return
+if n != m:
+raise IndexError( "internal error finding outline xrefs")
+# update all TOC item dictionaries
+for i in range(n):
+xref = int(xrefs[i])
+item = items[i]
+itemdict = item[3]
+if not isinstance(itemdict, dict):
+raise ValueError( "need non-simple TOC format")
+itemdict[dictkey_xref] = xrefs[i]
+bm = mupdf.pdf_load_object(pdf, xref)
+flags = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('F')))
+if flags == 1:
+itemdict[italic] = True
+elif flags == 2:
+itemdict[bold] = True
+elif flags == 3:
+itemdict[italic] = True
+itemdict[bold] = True
+count = mupdf.pdf_to_int( mupdf.pdf_dict_get(bm, PDF_NAME('Count')))
+if count < 0:
+itemdict[collapse] = True
+elif count > 0:
+itemdict[collapse] = False
+col = mupdf.pdf_dict_get(bm, PDF_NAME('C'))
+if mupdf.pdf_is_array(col) and mupdf.pdf_array_len(col) == 3:
+color = (
+mupdf.pdf_to_real(mupdf.pdf_array_get(col, 0)),
+mupdf.pdf_to_real(mupdf.pdf_array_get(col, 1)),
+mupdf.pdf_to_real(mupdf.pdf_array_get(col, 2)),
+)
+itemdict[dictkey_color] = color
+z=0
+obj = mupdf.pdf_dict_get(bm, PDF_NAME('Dest'))
+if not obj.m_internal or not mupdf.pdf_is_array(obj):
+obj = mupdf.pdf_dict_getl(bm, PDF_NAME('A'), PDF_NAME('D'))
+if mupdf.pdf_is_array(obj) and mupdf.pdf_array_len(obj) == 5:
+z = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, 4))
+itemdict[zoom] = float(z)
+item[3] = itemdict
+items[i] = item
+def _forget_page(self, page: Page):
+"""Remove a page from document page dict."""
+pid = id(page)
+if pid in self._page_refs:
+#self._page_refs[pid] = None
+del self._page_refs[pid]
+def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limit: int, idx: int = 0):
+pdf = _as_pdf_document(self)
+mylimit = limit
+if mylimit < 256:
+mylimit = 256
+if ordering >= 0:
+data, size, index = mupdf.fz_lookup_cjk_font(ordering)
+font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
+else:
+data, size = mupdf.fz_lookup_base14_font(bfname)
+if data:
+font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
+else:
+buf = JM_get_fontbuffer(pdf, xref)
+if not buf.m_internal:
+raise Exception("font at xref %d is not supported" % xref)
+font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0)
+wlist = []
+for i in range(mylimit):
+glyph = mupdf.fz_encode_character(font, i)
+adv = mupdf.fz_advance_glyph(font, glyph, 0)
+if ordering >= 0:
+glyph = i
+if glyph > 0:
+wlist.append( (glyph, adv))
+else:
+wlist.append( (glyph, 0.0))
+return wlist
+def _get_page_labels(self):
+pdf = _as_pdf_document(self)
+rc = []
+pagelabels = mupdf.pdf_new_name("PageLabels")
+obj = mupdf.pdf_dict_getl( mupdf.pdf_trailer(pdf), PDF_NAME('Root'), pagelabels)
+if not obj.m_internal:
+return rc
+# simple case: direct /Nums object
+nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Nums')))
+if nums.m_internal:
+JM_get_page_labels(rc, nums)
+return rc
+# case: /Kids/Nums
+nums = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_getl(obj, PDF_NAME('Kids'), PDF_NAME('Nums')))
+if nums.m_internal:
+JM_get_page_labels(rc, nums)
+return rc
+# case: /Kids is an array of multiple /Nums
+kids = mupdf.pdf_resolve_indirect( mupdf.pdf_dict_get( obj, PDF_NAME('Kids')))
+if not kids.m_internal or not mupdf.pdf_is_array(kids):
+return rc
+n = mupdf.pdf_array_len(kids)
+for i in range(n):
+nums = mupdf.pdf_resolve_indirect(
+mupdf.pdf_dict_get(
+mupdf.pdf_array_get(kids, i),
+PDF_NAME('Nums'),
+)
+)
+JM_get_page_labels(rc, nums)
+return rc
+def _getMetadata(self, key):
+"""Get metadata."""
+try:
+return mupdf.fz_lookup_metadata2( self.this, key)
+except Exception:
+if g_exceptions_verbose > 2:    exception_info()
+return ''
+def _getOLRootNumber(self):
+"""Get xref of Outline Root, create it if missing."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+# get main root
+root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
+# get outline root
+olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
+if not olroot.m_internal:
+olroot = mupdf.pdf_new_dict( pdf, 4)
+mupdf.pdf_dict_put( olroot, PDF_NAME('Type'), PDF_NAME('Outlines'))
+ind_obj = mupdf.pdf_add_object( pdf, olroot)
+mupdf.pdf_dict_put( root, PDF_NAME('Outlines'), ind_obj)
+olroot = mupdf.pdf_dict_get( root, PDF_NAME('Outlines'))
+return mupdf.pdf_to_num( olroot)
+def _getPDFfileid(self):
+"""Get PDF file id."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return
+idlist = []
+identity = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
+if identity.m_internal:
+n = mupdf.pdf_array_len(identity)
+for i in range(n):
+o = mupdf.pdf_array_get(identity, i)
+text = mupdf.pdf_to_text_string(o)
+hex_ = binascii.hexlify(text)
+idlist.append(hex_)
+return idlist
+def _getPageInfo(self, pno, what):
+"""List fonts, images, XObjects used on a page."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+doc = self.this
+pageCount = mupdf.pdf_count_pages(doc) if isinstance(doc, mupdf.PdfDocument) else mupdf.fz_count_pages(doc)
+n = pno  # pno < 0 is allowed
+while n < 0:
+n += pageCount  # make it non-negative
+if n >= pageCount:
+raise ValueError( MSG_BAD_PAGENO)
+pdf = _as_pdf_document(self)
+pageref = mupdf.pdf_lookup_page_obj(pdf, n)
+rsrc = mupdf.pdf_dict_get_inheritable(pageref, mupdf.PDF_ENUM_NAME_Resources)
+liste = []
+tracer = []
+if rsrc.m_internal:
+JM_scan_resources(pdf, rsrc, liste, what, 0, tracer)
+return liste
+def _insert_font(self, fontfile=None, fontbuffer=None):
+'''
+Utility: insert font from file or binary.
+'''
+pdf = _as_pdf_document(self)
+if not fontfile and not fontbuffer:
+raise ValueError( MSG_FILE_OR_BUFFER)
+value = JM_insert_font(pdf, None, fontfile, fontbuffer, 0, 0, 0, 0, 0, -1)
+return value
+def _loadOutline(self):
+"""Load first outline."""
+doc = self.this
+assert isinstance( doc, mupdf.FzDocument)
+try:
+ol = mupdf.fz_load_outline( doc)
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+return
+return Outline( ol)
+def _make_page_map(self):
+"""Make an array page number -> page object."""
+if self.is_closed:
+raise ValueError("document closed")
+assert 0, f'_make_page_map() is no-op'
+def _move_copy_page(self, pno, nb, before, copy):
+"""Move or copy a PDF page reference."""
+pdf = _as_pdf_document(self)
+same = 0
+# get the two page objects -----------------------------------
+# locate the /Kids arrays and indices in each
+page1, parent1, i1 = pdf_lookup_page_loc( pdf, pno)
+kids1 = mupdf.pdf_dict_get( parent1, PDF_NAME('Kids'))
+page2, parent2, i2 = pdf_lookup_page_loc( pdf, nb)
+kids2 = mupdf.pdf_dict_get( parent2, PDF_NAME('Kids'))
+if before:  # calc index of source page in target /Kids
+pos = i2
+else:
+pos = i2 + 1
+# same /Kids array? ------------------------------------------
+same = mupdf.pdf_objcmp( kids1, kids2)
+# put source page in target /Kids array ----------------------
+if not copy and same != 0:  # update parent in page object
+mupdf.pdf_dict_put( page1, PDF_NAME('Parent'), parent2)
+mupdf.pdf_array_insert( kids2, page1, pos)
+if same != 0:   # different /Kids arrays ----------------------
+parent = parent2
+while parent.m_internal:    # increase /Count objects in parents
+count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
+mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
+parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
+if not copy:    # delete original item
+mupdf.pdf_array_delete( kids1, i1)
+parent = parent1
+while parent.m_internal:    # decrease /Count objects in parents
+count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
+mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count - 1)
+parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
+else:   # same /Kids array
+if copy:    # source page is copied
+parent = parent2
+while parent.m_internal:    # increase /Count object in parents
+count = mupdf.pdf_dict_get_int( parent, PDF_NAME('Count'))
+mupdf.pdf_dict_put_int( parent, PDF_NAME('Count'), count + 1)
+parent = mupdf.pdf_dict_get( parent, PDF_NAME('Parent'))
+else:
+if i1 < pos:
+mupdf.pdf_array_delete( kids1, i1)
+else:
+mupdf.pdf_array_delete( kids1, i1 + 1)
+if pdf.m_internal.rev_page_map: # page map no longer valid: drop it
+mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
+self._reset_page_refs()
+def _newPage(self, pno=-1, width=595, height=842):
+"""Make a new PDF page."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if g_use_extra:
+extra._newPage( self.this, pno, width, height)
+else:
+pdf = _as_pdf_document(self)
+mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
+mediabox.x1 = width
+mediabox.y1 = height
+contents = mupdf.FzBuffer()
+if pno < -1:
+raise ValueError( MSG_BAD_PAGENO)
+# create /Resources and /Contents objects
+#resources = pdf.add_object(pdf.new_dict(1))
+resources = mupdf.pdf_add_new_dict(pdf, 1)
+page_obj = mupdf.pdf_add_page( pdf, mediabox, 0, resources, contents)
+mupdf.pdf_insert_page( pdf, pno, page_obj)
+# fixme: pdf->dirty = 1;
+self._reset_page_refs()
+return self[pno]
+def _remove_links_to(self, numbers):
+pdf = _as_pdf_document(self)
+_remove_dest_range(pdf, numbers)
+def _remove_toc_item(self, xref):
+# "remove" bookmark by letting it point to nowhere
+pdf = _as_pdf_document(self)
+item = mupdf.pdf_new_indirect(pdf, xref, 0)
+mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
+mupdf.pdf_dict_del( item, PDF_NAME('A'))
+color = mupdf.pdf_new_array( pdf, 3)
+for i in range(3):
+mupdf.pdf_array_push_real( color, 0.8)
+mupdf.pdf_dict_put( item, PDF_NAME('C'), color)
+def _reset_page_refs(self):
+"""Invalidate all pages in document dictionary."""
+if getattr(self, "is_closed", True):
+return
+pages = [p for p in self._page_refs.values()]
+for page in pages:
+if page:
+page._erase()
+page = None
+self._page_refs.clear()
+def _set_page_labels(self, labels):
+pdf = _as_pdf_document(self)
+pagelabels = mupdf.pdf_new_name("PageLabels")
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+mupdf.pdf_dict_del(root, pagelabels)
+mupdf.pdf_dict_putl(root, mupdf.pdf_new_array(pdf, 0), pagelabels, PDF_NAME('Nums'))
+xref = self.pdf_catalog()
+text = self.xref_object(xref, compressed=True)
+text = text.replace("/Nums[]", "/Nums[%s]" % labels)
+self.update_object(xref, text)
+def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None):
+'''
+"update" bookmark by letting it point to nowhere
+'''
+pdf = _as_pdf_document(self)
+item = mupdf.pdf_new_indirect( pdf, xref, 0)
+if title:
+mupdf.pdf_dict_put_text_string( item, PDF_NAME('Title'), title)
+if action:
+mupdf.pdf_dict_del( item, PDF_NAME('Dest'))
+obj = JM_pdf_obj_from_str( pdf, action)
+mupdf.pdf_dict_put( item, PDF_NAME('A'), obj)
+mupdf.pdf_dict_put_int( item, PDF_NAME('F'), flags)
+if color:
+c = mupdf.pdf_new_array( pdf, 3)
+for i in range(3):
+f = color[i]
+mupdf.pdf_array_push_real( c, f)
+mupdf.pdf_dict_put( item, PDF_NAME('C'), c)
+elif color is not None:
+mupdf.pdf_dict_del( item, PDF_NAME('C'))
+if collapse is not None:
+if mupdf.pdf_dict_get( item, PDF_NAME('Count')).m_internal:
+i = mupdf.pdf_dict_get_int( item, PDF_NAME('Count'))
+if (i < 0 and collapse is False) or (i > 0 and collapse is True):
+i = i * (-1)
+mupdf.pdf_dict_put_int( item, PDF_NAME('Count'), i)
+@property
+def FormFonts(self):
+"""Get list of field font resource names."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return
+fonts = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('DR'),
+PDF_NAME('Font'),
+)
+liste = list()
+if fonts.m_internal and mupdf.pdf_is_dict(fonts):   # fonts exist
+n = mupdf.pdf_dict_len(fonts)
+for i in range(n):
+f = mupdf.pdf_dict_get_key(fonts, i)
+liste.append(JM_UnicodeFromStr(mupdf.pdf_to_name(f)))
+return liste
+def add_layer(self, name, creator=None, on=None):
+"""Add a new OC layer."""
+pdf = _as_pdf_document(self)
+JM_add_layer_config( pdf, name, creator, on)
+mupdf.ll_pdf_read_ocg( pdf.m_internal)
+def add_ocg(self, name, config=-1, on=1, intent=None, usage=None):
+"""Add new optional content group."""
+xref = 0
+pdf = _as_pdf_document(self)
+# make the OCG
+ocg = mupdf.pdf_add_new_dict(pdf, 3)
+mupdf.pdf_dict_put(ocg, PDF_NAME('Type'), PDF_NAME('OCG'))
+mupdf.pdf_dict_put_text_string(ocg, PDF_NAME('Name'), name)
+intents = mupdf.pdf_dict_put_array(ocg, PDF_NAME('Intent'), 2)
+if not intent:
+mupdf.pdf_array_push(intents, PDF_NAME('View'))
+elif not isinstance(intent, str):
+assert 0, f'fixme: intent is not a str. {type(intent)=} {type=}'
+#n = len(intent)
+#for i in range(n):
+#    item = intent[i]
+#    c = JM_StrAsChar(item);
+#    if (c) {
+#        pdf_array_push(gctx, intents, pdf_new_name(gctx, c));
+#    }
+#    Py_DECREF(item);
+#}
+else:
+mupdf.pdf_array_push(intents, mupdf.pdf_new_name(intent))
+use_for = mupdf.pdf_dict_put_dict(ocg, PDF_NAME('Usage'), 3)
+ci_name = mupdf.pdf_new_name("CreatorInfo")
+cre_info = mupdf.pdf_dict_put_dict(use_for, ci_name, 2)
+mupdf.pdf_dict_put_text_string(cre_info, PDF_NAME('Creator'), "PyMuPDF")
+if usage:
+mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), usage)
+else:
+mupdf.pdf_dict_put_name(cre_info, PDF_NAME('Subtype'), "Artwork")
+indocg = mupdf.pdf_add_object(pdf, ocg)
+# Insert OCG in the right config
+ocp = JM_ensure_ocproperties(pdf)
+obj = mupdf.pdf_dict_get(ocp, PDF_NAME('OCGs'))
+mupdf.pdf_array_push(obj, indocg)
+if config > -1:
+obj = mupdf.pdf_dict_get(ocp, PDF_NAME('Configs'))
+if not mupdf.pdf_is_array(obj):
+raise ValueError( MSG_BAD_OC_CONFIG)
+cfg = mupdf.pdf_array_get(obj, config)
+if not cfg.m_internal:
+raise ValueError( MSG_BAD_OC_CONFIG)
+else:
+cfg = mupdf.pdf_dict_get(ocp, PDF_NAME('D'))
+obj = mupdf.pdf_dict_get(cfg, PDF_NAME('Order'))
+if not obj.m_internal:
+obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('Order'), 1)
+mupdf.pdf_array_push(obj, indocg)
+if on:
+obj = mupdf.pdf_dict_get(cfg, PDF_NAME('ON'))
+if not obj.m_internal:
+obj = mupdf.pdf_dict_put_array(cfg, PDF_NAME('ON'), 1)
+else:
+obj =mupdf.pdf_dict_get(cfg, PDF_NAME('OFF'))
+if not obj.m_internal:
+obj =mupdf.pdf_dict_put_array(cfg, PDF_NAME('OFF'), 1)
+mupdf.pdf_array_push(obj, indocg)
+# let MuPDF take note: re-read OCProperties
+mupdf.ll_pdf_read_ocg(pdf.m_internal)
+xref = mupdf.pdf_to_num(indocg)
+return xref
+def authenticate(self, password):
+"""Decrypt document."""
+if self.is_closed:
+raise ValueError("document closed")
+val = mupdf.fz_authenticate_password(self.this, password)
+if val:  # the doc is decrypted successfully and we init the outline
+self.is_encrypted = False
+self.is_encrypted = False
+self.init_doc()
+self.thisown = True
+return val
+def can_save_incrementally(self):
+"""Check whether incremental saves are possible."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return False
+return mupdf.pdf_can_be_saved_incrementally(pdf)
+def bake(self, *, annots: bool = True, widgets: bool = True) -> None:
+"""Convert annotations or fields to permanent content.
+Notes:
+Converts annotations or widgets to permanent page content, like
+text and vector graphics, as appropriate.
+After execution, pages will still look the same, but no longer
+have annotations, respectively no fields.
+If widgets are selected the PDF will no longer be a Form PDF.
+Args:
+annots: convert annotations
+widgets: convert form fields
+"""
+pdf = _as_pdf_document(self)
+mupdf.pdf_bake_document(pdf, int(annots), int(widgets))
+@property
+def chapter_count(self):
+"""Number of chapters."""
+if self.is_closed:
+raise ValueError("document closed")
+return mupdf.fz_count_chapters( self.this)
+def chapter_page_count(self, chapter):
+"""Page count of chapter."""
+if self.is_closed:
+raise ValueError("document closed")
+chapters = mupdf.fz_count_chapters( self.this)
+if chapter < 0 or chapter >= chapters:
+raise ValueError( "bad chapter number")
+pages = mupdf.fz_count_chapter_pages( self.this, chapter)
+return pages
+def close(self):
+"""Close document."""
+if getattr(self, "is_closed", True):
+raise ValueError("document closed")
+# self._cleanup()
+if hasattr(self, "_outline") and self._outline:
+self._outline = None
+self._reset_page_refs()
+#self.metadata    = None
+#self.stream      = None
+self.is_closed    = True
+#self.FontInfos   = []
+self.Graftmaps = {} # Fixes test_3140().
+#self.ShownPages = {}
+#self.InsertedImages  = {}
+#self.this = None
+self.this = None
+def convert_to_pdf(self, from_page=0, to_page=-1, rotate=0):
+"""Convert document to a PDF, selecting page range and optional rotation. Output bytes object."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+fz_doc = self.this
+fp = from_page
+tp = to_page
+srcCount = mupdf.fz_count_pages(fz_doc)
+if fp < 0:
+fp = 0
+if fp > srcCount - 1:
+fp = srcCount - 1
+if tp < 0:
+tp = srcCount - 1
+if tp > srcCount - 1:
+tp = srcCount - 1
+len0 = len(JM_mupdf_warnings_store)
+doc = JM_convert_to_pdf(fz_doc, fp, tp, rotate)
+len1 = len(JM_mupdf_warnings_store)
+for i in range(len0, len1):
+message(f'{JM_mupdf_warnings_store[i]}')
+return doc
+def copy_page(self, pno: int, to: int =-1):
+"""Copy a page within a PDF document.
+This will only create another reference of the same page object.
+Args:
+pno: source page number
+to: put before this page, '-1' means after last page.
+"""
+if self.is_closed:
+raise ValueError("document closed")
+page_count = len(self)
+if (
+pno not in range(page_count)
+or to not in range(-1, page_count)
+):
+raise ValueError("bad page number(s)")
+before = 1
+copy = 1
+if to == -1:
+to = page_count - 1
+before = 0
+return self._move_copy_page(pno, to, before, copy)
+def del_xml_metadata(self):
+"""Delete XML metadata."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
+if root.m_internal:
+mupdf.pdf_dict_del( root, PDF_NAME('Metadata'))
+def delete_page(self, pno: int =-1):
+""" Delete one page from a PDF.
+"""
+return self.delete_pages(pno)
+def delete_pages(self, *args, **kw):
+"""Delete pages from a PDF.
+Args:
+Either keywords 'from_page'/'to_page', or two integers to
+specify the first/last page to delete.
+Or a list/tuple/range object, which can contain arbitrary
+page numbers.
+Or a single integer page number.
+"""
+if not self.is_pdf:
+raise ValueError("is no PDF")
+if self.is_closed:
+raise ValueError("document closed")
+page_count = self.page_count  # page count of document
+f = t = -1
+if kw:  # check if keywords were used
+if args:  # then no positional args are allowed
+raise ValueError("cannot mix keyword and positional argument")
+f = kw.get("from_page", -1)  # first page to delete
+t = kw.get("to_page", -1)  # last page to delete
+while f < 0:
+f += page_count
+while t < 0:
+t += page_count
+if not f <= t < page_count:
+raise ValueError("bad page number(s)")
+numbers = tuple(range(f, t + 1))
+else:
+if len(args) > 2 or args == []:
+raise ValueError("need 1 or 2 positional arguments")
+if len(args) == 2:
+f, t = args
+if not (type(f) is int and type(t) is int):
+raise ValueError("both arguments must be int")
+if f > t:
+f, t = t, f
+if not f <= t < page_count:
+raise ValueError("bad page number(s)")
+numbers = tuple(range(f, t + 1))
+elif isinstance(args[0], int):
+pno = args[0]
+while pno < 0:
+pno += page_count
+numbers = (pno,)
+else:
+numbers = tuple(args[0])
+numbers = list(map(int, set(numbers)))  # ensure unique integers
+if numbers == []:
+message("nothing to delete")
+return
+numbers.sort()
+if numbers[0] < 0 or numbers[-1] >= page_count:
+raise ValueError("bad page number(s)")
+frozen_numbers = frozenset(numbers)
+toc = self.get_toc()
+for i, xref in enumerate(self.get_outline_xrefs()):
+if toc[i][2] - 1 in frozen_numbers:
+self._remove_toc_item(xref)  # remove target in PDF object
+self._remove_links_to(frozen_numbers)
+for i in reversed(numbers):  # delete pages, last to first
+self._delete_page(i)
+self._reset_page_refs()
+def embfile_add(self,
+name: str,
+buffer_: ByteString,
+filename: OptStr =None,
+ufilename: OptStr =None,
+desc: OptStr =None,
+) -> None:
+"""Add an item to the EmbeddedFiles array.
+Args:
+name: name of the new item, must not already exist.
+buffer_: (binary data) the file content.
+filename: (str) the file name, default: the name
+ufilename: (unicode) the file name, default: filename
+desc: (str) the description.
+"""
+filenames = self.embfile_names()
+msg = "Name '%s' already exists." % str(name)
+if name in filenames:
+raise ValueError(msg)
+if filename is None:
+filename = name
+if ufilename is None:
+ufilename = filename
+if desc is None:
+desc = name
+xref = self._embfile_add(
+name,
+buffer_=buffer_,
+filename=filename,
+ufilename=ufilename,
+desc=desc,
+)
+date = get_pdf_now()
+self.xref_set_key(xref, "Type", "/EmbeddedFile")
+self.xref_set_key(xref, "Params/CreationDate", get_pdf_str(date))
+self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
+return xref
+def embfile_count(self) -> int:
+"""Get number of EmbeddedFiles."""
+return len(self.embfile_names())
+def embfile_del(self, item: typing.Union[int, str]):
+"""Delete an entry from EmbeddedFiles.
+Notes:
+The argument must be name or index of an EmbeddedFiles item.
+Physical deletion of data will happen on save to a new
+file with appropriate garbage option.
+Args:
+item: name or number of item.
+Returns:
+None
+"""
+idx = self._embeddedFileIndex(item)
+return self._embfile_del(idx)
+def embfile_get(self, item: typing.Union[int, str]) -> bytes:
+"""Get the content of an item in the EmbeddedFiles array.
+Args:
+item: number or name of item.
+Returns:
+(bytes) The file content.
+"""
+idx = self._embeddedFileIndex(item)
+return self._embeddedFileGet(idx)
+def embfile_info(self, item: typing.Union[int, str]) -> dict:
+"""Get information of an item in the EmbeddedFiles array.
+Args:
+item: number or name of item.
+Returns:
+Information dictionary.
+"""
+idx = self._embeddedFileIndex(item)
+infodict = {"name": self.embfile_names()[idx]}
+xref = self._embfile_info(idx, infodict)
+t, date = self.xref_get_key(xref, "Params/CreationDate")
+if t != "null":
+infodict["creationDate"] = date
+t, date = self.xref_get_key(xref, "Params/ModDate")
+if t != "null":
+infodict["modDate"] = date
+t, md5 = self.xref_get_key(xref, "Params/CheckSum")
+if t != "null":
+infodict["checksum"] = binascii.hexlify(md5.encode()).decode()
+return infodict
+def embfile_names(self) -> list:
+"""Get list of names of EmbeddedFiles."""
+filenames = []
+self._embfile_names(filenames)
+return filenames
+def embfile_upd(self,
+item: typing.Union[int, str],
+buffer_: OptBytes =None,
+filename: OptStr =None,
+ufilename: OptStr =None,
+desc: OptStr =None,
+) -> None:
+"""Change an item of the EmbeddedFiles array.
+Notes:
+Only provided parameters are changed. If all are omitted,
+the method is a no-op.
+Args:
+item: number or name of item.
+buffer_: (binary data) the new file content.
+filename: (str) the new file name.
+ufilename: (unicode) the new filen ame.
+desc: (str) the new description.
+"""
+idx = self._embeddedFileIndex(item)
+xref = self._embfile_upd(
+idx,
+buffer_=buffer_,
+filename=filename,
+ufilename=ufilename,
+desc=desc,
+)
+date = get_pdf_now()
+self.xref_set_key(xref, "Params/ModDate", get_pdf_str(date))
+return xref
+def extract_font(self, xref=0, info_only=0, named=None):
+'''
+Get a font by xref. Returns a tuple or dictionary.
+'''
+#log( '{=xref info_only}')
+pdf = _as_pdf_document(self)
+obj = mupdf.pdf_load_object(pdf, xref)
+type_ = mupdf.pdf_dict_get(obj, PDF_NAME('Type'))
+subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
+if (mupdf.pdf_name_eq(type_, PDF_NAME('Font'))
+and not mupdf.pdf_to_name( subtype).startswith('CIDFontType')
+):
+basefont = mupdf.pdf_dict_get(obj, PDF_NAME('BaseFont'))
+if not basefont.m_internal or mupdf.pdf_is_null(basefont):
+bname = mupdf.pdf_dict_get(obj, PDF_NAME('Name'))
+else:
+bname = basefont
+ext = JM_get_fontextension(pdf, xref)
+if ext != 'n/a' and not info_only:
+buffer_ = JM_get_fontbuffer(pdf, xref)
+bytes_ = JM_BinFromBuffer(buffer_)
+else:
+bytes_ = b''
+if not named:
+rc = (
+JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
+JM_UnicodeFromStr(ext),
+JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
+bytes_,
+)
+else:
+rc = {
+dictkey_name: JM_EscapeStrFromStr(mupdf.pdf_to_name(bname)),
+dictkey_ext: JM_UnicodeFromStr(ext),
+dictkey_type: JM_UnicodeFromStr(mupdf.pdf_to_name(subtype)),
+dictkey_content: bytes_,
+}
+else:
+if not named:
+rc = '', '', '', b''
+else:
+rc = {
+dictkey_name: '',
+dictkey_ext: '',
+dictkey_type: '',
+dictkey_content: b'',
+}
+return rc
+def extract_image(self, xref):
+"""Get image by xref. Returns a dictionary."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+if not _INRANGE(xref, 1, mupdf.pdf_xref_len(pdf)-1):
+raise ValueError( MSG_BAD_XREF)
+obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+subtype = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
+if not mupdf.pdf_name_eq(subtype, PDF_NAME('Image')):
+raise ValueError( "not an image")
+o = mupdf.pdf_dict_geta(obj, PDF_NAME('SMask'), PDF_NAME('Mask'))
+if o.m_internal:
+smask = mupdf.pdf_to_num(o)
+else:
+smask = 0
+# load the image
+img = mupdf.pdf_load_image(pdf, obj)
+rc = dict()
+_make_image_dict(img, rc)
+rc[dictkey_smask] = smask
+rc[dictkey_cs_name] = mupdf.fz_colorspace_name(img.colorspace())
+return rc
+def ez_save(
+self,
+filename,
+garbage=3,
+clean=False,
+deflate=True,
+deflate_images=True,
+deflate_fonts=True,
+incremental=False,
+ascii=False,
+expand=False,
+linear=False,
+pretty=False,
+encryption=1,
+permissions=4095,
+owner_pw=None,
+user_pw=None,
+no_new_id=True,
+preserve_metadata=1,
+use_objstms=1,
+compression_effort=0,
+):
+'''
+Save PDF using some different defaults
+'''
+return self.save(
+filename,
+garbage=garbage,
+clean=clean,
+deflate=deflate,
+deflate_images=deflate_images,
+deflate_fonts=deflate_fonts,
+incremental=incremental,
+ascii=ascii,
+expand=expand,
+linear=linear,
+pretty=pretty,
+encryption=encryption,
+permissions=permissions,
+owner_pw=owner_pw,
+user_pw=user_pw,
+no_new_id=no_new_id,
+preserve_metadata=preserve_metadata,
+use_objstms=use_objstms,
+compression_effort=compression_effort,
+)
+def find_bookmark(self, bm):
+"""Find new location after layouting a document."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+location = mupdf.fz_lookup_bookmark2( self.this, bm)
+return location.chapter, location.page
+def fullcopy_page(self, pno, to=-1):
+"""Make a full page duplicate."""
+pdf = _as_pdf_document(self)
+page_count = mupdf.pdf_count_pages( pdf)
+try:
+if (not _INRANGE(pno, 0, page_count - 1)
+or not _INRANGE(to, -1, page_count - 1)
+):
+raise ValueError( MSG_BAD_PAGENO)
+page1 = mupdf.pdf_resolve_indirect( mupdf.pdf_lookup_page_obj( pdf, pno))
+page2 = mupdf.pdf_deep_copy_obj( page1)
+old_annots = mupdf.pdf_dict_get( page2, PDF_NAME('Annots'))
+# copy annotations, but remove Popup and IRT types
+if old_annots.m_internal:
+n = mupdf.pdf_array_len( old_annots)
+new_annots = mupdf.pdf_new_array( pdf, n)
+for i in range(n):
+o = mupdf.pdf_array_get( old_annots, i)
+subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
+if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
+continue
+if mupdf.pdf_dict_gets( o, "IRT").m_internal:
+continue
+copy_o = mupdf.pdf_deep_copy_obj( mupdf.pdf_resolve_indirect( o))
+xref = mupdf.pdf_create_object( pdf)
+mupdf.pdf_update_object( pdf, xref, copy_o)
+copy_o = mupdf.pdf_new_indirect( pdf, xref, 0)
+mupdf.pdf_dict_del( copy_o, PDF_NAME('Popup'))
+mupdf.pdf_dict_del( copy_o, PDF_NAME('P'))
+mupdf.pdf_array_push( new_annots, copy_o)
+mupdf.pdf_dict_put( page2, PDF_NAME('Annots'), new_annots)
+# copy the old contents stream(s)
+res = JM_read_contents( page1)
+# create new /Contents object for page2
+if res and res.m_internal:
+#contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b"  ", 1), NULL, 0)
+contents = mupdf.pdf_add_stream( pdf, mupdf.fz_new_buffer_from_copied_data( b" "), mupdf.PdfObj(), 0)
+JM_update_stream( pdf, contents, res, 1)
+mupdf.pdf_dict_put( page2, PDF_NAME('Contents'), contents)
+# now insert target page, making sure it is an indirect object
+xref = mupdf.pdf_create_object( pdf)   # get new xref
+mupdf.pdf_update_object( pdf, xref, page2) # store new page
+page2 = mupdf.pdf_new_indirect( pdf, xref, 0)  # reread object
+mupdf.pdf_insert_page( pdf, to, page2) # and store the page
+finally:
+mupdf.ll_pdf_drop_page_tree( pdf.m_internal)
+self._reset_page_refs()
+def get_layer(self, config=-1):
+"""Content of ON, OFF, RBGroups of an OC layer."""
+pdf = _as_pdf_document(self)
+ocp = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer( pdf),
+PDF_NAME('Root'),
+PDF_NAME('OCProperties'),
+)
+if not ocp.m_internal:
+return
+if config == -1:
+obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
+else:
+obj = mupdf.pdf_array_get(
+mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
+config,
+)
+if not obj.m_internal:
+raise ValueError( MSG_BAD_OC_CONFIG)
+rc = JM_get_ocg_arrays( obj)
+return rc
+def get_layers(self):
+"""Show optional OC layers."""
+pdf = _as_pdf_document(self)
+n = mupdf.pdf_count_layer_configs( pdf)
+if n == 1:
+obj = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer( pdf),
+PDF_NAME('Root'),
+PDF_NAME('OCProperties'),
+PDF_NAME('Configs'),
+)
+if not mupdf.pdf_is_array( obj):
+n = 0
+rc = []
+info = mupdf.PdfLayerConfig()
+for i in range(n):
+mupdf.pdf_layer_config_info( pdf, i, info)
+item = {
+"number": i,
+"name": info.name,
+"creator": info.creator,
+}
+rc.append( item)
+return rc
+def get_new_xref(self):
+"""Make new xref."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+xref = 0
+ENSURE_OPERATION(pdf)
+xref = mupdf.pdf_create_object(pdf)
+return xref
+def get_ocgs(self):
+"""Show existing optional content groups."""
+ci = mupdf.pdf_new_name( "CreatorInfo")
+pdf = _as_pdf_document(self)
+ocgs = mupdf.pdf_dict_getl(
+mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root')),
+PDF_NAME('OCProperties'),
+PDF_NAME('OCGs'),
+)
+rc = dict()
+if not mupdf.pdf_is_array( ocgs):
+return rc
+n = mupdf.pdf_array_len( ocgs)
+for i in range(n):
+ocg = mupdf.pdf_array_get( ocgs, i)
+xref = mupdf.pdf_to_num( ocg)
+name = mupdf.pdf_to_text_string( mupdf.pdf_dict_get( ocg, PDF_NAME('Name')))
+obj = mupdf.pdf_dict_getl( ocg, PDF_NAME('Usage'), ci, PDF_NAME('Subtype'))
+usage = None
+if obj.m_internal:
+usage = mupdf.pdf_to_name( obj)
+intents = list()
+intent = mupdf.pdf_dict_get( ocg, PDF_NAME('Intent'))
+if intent.m_internal:
+if mupdf.pdf_is_name( intent):
+intents.append( mupdf.pdf_to_name( intent))
+elif mupdf.pdf_is_array( intent):
+m = mupdf.pdf_array_len( intent)
+for j in range(m):
+o = mupdf.pdf_array_get( intent, j)
+if mupdf.pdf_is_name( o):
+intents.append( mupdf.pdf_to_name( o))
+hidden = mupdf.pdf_is_ocg_hidden( pdf, mupdf.PdfObj(), usage, ocg)
+item = {
+"name": name,
+"intent": intents,
+"on": not hidden,
+"usage": usage,
+}
+temp = xref
+rc[ temp] = item
+return rc
+def get_outline_xrefs(self):
+"""Get list of outline xref numbers."""
+xrefs = []
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return xrefs
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+if not root.m_internal:
+return xrefs
+olroot = mupdf.pdf_dict_get(root, PDF_NAME('Outlines'))
+if not olroot.m_internal:
+return xrefs
+first = mupdf.pdf_dict_get(olroot, PDF_NAME('First'))
+if not first.m_internal:
+return xrefs
+xrefs = JM_outline_xrefs(first, xrefs)
+return xrefs
+def get_page_fonts(self, pno: int, full: bool =False) -> list:
+"""Retrieve a list of fonts used on a page.
+"""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if not self.is_pdf:
+return ()
+if type(pno) is not int:
+try:
+pno = pno.number
+except Exception:
+exception_info()
+raise ValueError("need a Page or page number")
+val = self._getPageInfo(pno, 1)
+if not full:
+return [v[:-1] for v in val]
+return val
+def get_page_images(self, pno: int, full: bool =False) -> list:
+"""Retrieve a list of images used on a page.
+"""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if not self.is_pdf:
+return ()
+val = self._getPageInfo(pno, 2)
+if not full:
+return [v[:-1] for v in val]
+return val
+def get_page_xobjects(self, pno: int) -> list:
+"""Retrieve a list of XObjects used on a page.
+"""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if not self.is_pdf:
+return ()
+val = self._getPageInfo(pno, 3)
+return val
+def get_sigflags(self):
+"""Get the /SigFlags value."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return -1   # not a PDF
+sigflags = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('SigFlags'),
+)
+sigflag = -1
+if sigflags.m_internal:
+sigflag = mupdf.pdf_to_int(sigflags)
+return sigflag
+def get_xml_metadata(self):
+"""Get document XML metadata."""
+xml = None
+pdf = _as_pdf_document(self, required=0)
+if pdf.m_internal:
+xml = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('Metadata'),
+)
+if xml is not None and xml.m_internal:
+buff = mupdf.pdf_load_stream(xml)
+rc = JM_UnicodeFromBuffer(buff)
+else:
+rc = ''
+return rc
+def init_doc(self):
+if self.is_encrypted:
+raise ValueError("cannot initialize - document still encrypted")
+self._outline = self._loadOutline()
+self.metadata = dict(
+[
+(k,self._getMetadata(v)) for k,v in {
+'format':'format',
+'title':'info:Title',
+'author':'info:Author',
+'subject':'info:Subject',
+'keywords':'info:Keywords',
+'creator':'info:Creator',
+'producer':'info:Producer',
+'creationDate':'info:CreationDate',
+'modDate':'info:ModDate',
+'trapped':'info:Trapped'
+}.items()
+]
+)
+self.metadata['encryption'] = None if self._getMetadata('encryption')=='None' else self._getMetadata('encryption')
+def insert_file(self,
+infile,
+from_page=-1,
+to_page=-1,
+start_at=-1,
+rotate=-1,
+links=True,
+annots=True,
+show_progress=0,
+final=1,
+):
+'''
+Insert an arbitrary supported document to an existing PDF.
+The infile may be given as a filename, a Document or a Pixmap. Other
+parameters - where applicable - equal those of insert_pdf().
+'''
+src = None
+if isinstance(infile, Pixmap):
+if infile.colorspace.n > 3:
+infile = Pixmap(csRGB, infile)
+src = Document("png", infile.tobytes())
+elif isinstance(infile, Document):
+src = infile
+else:
+src = Document(infile)
+if not src:
+raise ValueError("bad infile parameter")
+if not src.is_pdf:
+pdfbytes = src.convert_to_pdf()
+src = Document("pdf", pdfbytes)
+return self.insert_pdf(
+src,
+from_page=from_page,
+to_page=to_page,
+start_at=start_at,
+rotate=rotate,
+links=links,
+annots=annots,
+show_progress=show_progress,
+final=final,
+)
+def insert_pdf(
+self,
+docsrc,
+*,
+from_page=-1,
+to_page=-1,
+start_at=-1,
+rotate=-1,
+links=1,
+annots=1,
+widgets=1,
+join_duplicates=0,
+show_progress=0,
+final=1,
+_gmap=None,
+):
+"""Insert a page range from another PDF.
+Args:
+docsrc: PDF to copy from. Must be different object, but may be same file.
+from_page: (int) first source page to copy, 0-based, default 0.
+to_page: (int) last source page to copy, 0-based, default last page.
+start_at: (int) from_page will become this page number in target.
+rotate: (int) rotate copied pages, default -1 is no change.
+links: (int/bool) whether to also copy links.
+annots: (int/bool) whether to also copy annotations.
+widgets: (int/bool) whether to also copy form fields.
+join_duplicates: (int/bool) join or rename duplicate widget names.
+show_progress: (int) progress message interval, 0 is no messages.
+final: (bool) indicates last insertion from this source PDF.
+_gmap: internal use only
+Copy sequence reversed if from_page > to_page."""
+# Insert pages from a source PDF into this PDF.
+# For reconstructing the links (_do_links method), we must save the
+# insertion point (start_at) if it was specified as -1.
+#log( 'insert_pdf(): start')
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if self._graft_id == docsrc._graft_id:
+raise ValueError("source and target cannot be same object")
+sa = start_at
+if sa < 0:
+sa = self.page_count
+outCount = self.page_count
+srcCount = docsrc.page_count
+# local copies of page numbers
+fp = from_page
+tp = to_page
+sa = start_at
+# normalize page numbers
+fp = max(fp, 0) # -1 = first page
+fp = min(fp, srcCount - 1)  # but do not exceed last page
+if tp < 0:
+tp = srcCount - 1   # -1 = last page
+tp = min(tp, srcCount - 1)  # but do not exceed last page
+if sa < 0:
+sa = outCount   # -1 = behind last page
+sa = min(sa, outCount)  # but that is also the limit
+if len(docsrc) > show_progress > 0:
+inname = os.path.basename(docsrc.name)
+if not inname:
+inname = "memory PDF"
+outname = os.path.basename(self.name)
+if not outname:
+outname = "memory PDF"
+message("Inserting '%s' at '%s'" % (inname, outname))
+# retrieve / make a Graftmap to avoid duplicate objects
+#log( 'insert_pdf(): Graftmaps')
+isrt = docsrc._graft_id
+_gmap = self.Graftmaps.get(isrt, None)
+if _gmap is None:
+#log( 'insert_pdf(): Graftmaps2')
+_gmap = Graftmap(self)
+self.Graftmaps[isrt] = _gmap
+if g_use_extra:
+#log( 'insert_pdf(): calling extra_FzDocument_insert_pdf()')
+extra_FzDocument_insert_pdf(
+self.this,
+docsrc.this,
+from_page,
+to_page,
+start_at,
+rotate,
+links,
+annots,
+show_progress,
+final,
+_gmap,
+)
+#log( 'insert_pdf(): extra_FzDocument_insert_pdf() returned.')
+else:
+pdfout = _as_pdf_document(self)
+pdfsrc = _as_pdf_document(docsrc)
+if not pdfout.m_internal or not pdfsrc.m_internal:
+raise TypeError( "source or target not a PDF")
+ENSURE_OPERATION(pdfout)
+JM_merge_range(pdfout, pdfsrc, fp, tp, sa, rotate, links, annots, show_progress, _gmap)
+#log( 'insert_pdf(): calling self._reset_page_refs()')
+self._reset_page_refs()
+if links:
+#log( 'insert_pdf(): calling self._do_links()')
+self._do_links(docsrc, from_page=fp, to_page=tp, start_at=sa)
+if widgets:
+self._do_widgets(docsrc, _gmap, from_page=fp, to_page=tp, start_at=sa, join_duplicates=join_duplicates)
+if final == 1:
+self.Graftmaps[isrt] = None
+#log( 'insert_pdf(): returning')
+@property
+def is_dirty(self):
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return False
+r = mupdf.pdf_has_unsaved_changes(pdf)
+return True if r else False
+@property
+def is_fast_webaccess(self):
+'''
+Check whether we have a linearized PDF.
+'''
+pdf = _as_pdf_document(self, required=0)
+if pdf.m_internal:
+return mupdf.pdf_doc_was_linearized(pdf)
+return False    # gracefully handle non-PDF
+@property
+def is_form_pdf(self):
+"""Either False or PDF field count."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return False
+count = -1
+try:
+fields = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer(pdf),
+mupdf.PDF_ENUM_NAME_Root,
+mupdf.PDF_ENUM_NAME_AcroForm,
+mupdf.PDF_ENUM_NAME_Fields,
+)
+if mupdf.pdf_is_array(fields):
+count = mupdf.pdf_array_len(fields)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+return False
+if count >= 0:
+return count
+return False
+@property
+def is_pdf(self):
+"""Check for PDF."""
+if isinstance(self.this, mupdf.PdfDocument):
+return True
+# Avoid calling smupdf.pdf_specifics because it will end up creating
+# a new PdfDocument which will call pdf_create_document(), which is ok
+# but a little unnecessary.
+#
+if mupdf.ll_pdf_specifics(self.this.m_internal):
+ret = True
+else:
+ret = False
+return ret
+@property
+def is_reflowable(self):
+"""Check if document is layoutable."""
+if self.is_closed:
+raise ValueError("document closed")
+return bool(mupdf.fz_is_document_reflowable(self))
+@property
+def is_repaired(self):
+"""Check whether PDF was repaired."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return False
+r = mupdf.pdf_was_repaired(pdf)
+if r:
+return True
+return False
+def journal_can_do(self):
+"""Show if undo and / or redo are possible."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+undo=0
+redo=0
+pdf = _as_pdf_document(self)
+undo = mupdf.pdf_can_undo(pdf)
+redo = mupdf.pdf_can_redo(pdf)
+return {'undo': bool(undo), 'redo': bool(redo)}
+def journal_enable(self):
+"""Activate document journalling."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+mupdf.pdf_enable_journal(pdf)
+def journal_is_enabled(self):
+"""Check if journalling is enabled."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+enabled = pdf.m_internal and pdf.m_internal.journal
+return enabled
+def journal_load(self, filename):
+"""Load a journal from a file."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+if isinstance(filename, str):
+mupdf.pdf_load_journal(pdf, filename)
+else:
+res = JM_BufferFromBytes(filename)
+stm = mupdf.fz_open_buffer(res)
+mupdf.pdf_deserialise_journal(pdf, stm)
+if not pdf.m_internal.journal:
+RAISEPY( "Journal and document do not match", JM_Exc_FileDataError)
+def journal_op_name(self, step):
+"""Show operation name for given step."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+name = mupdf.pdf_undoredo_step(pdf, step)
+return name
+def journal_position(self):
+"""Show journalling state."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+steps=0
+pdf = _as_pdf_document(self)
+rc, steps = mupdf.pdf_undoredo_state(pdf)
+return rc, steps
+def journal_redo(self):
+"""Move forward in the journal."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+mupdf.pdf_redo(pdf)
+return True
+def journal_save(self, filename):
+"""Save journal to a file."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+if isinstance(filename, str):
+mupdf.pdf_save_journal(pdf, filename)
+else:
+out = JM_new_output_fileptr(filename)
+mupdf.pdf_write_journal(pdf, out)
+out.fz_close_output()
+def journal_start_op(self, name=None):
+"""Begin a journalling operation."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+if not pdf.m_internal.journal:
+raise RuntimeError( "Journalling not enabled")
+if name:
+mupdf.pdf_begin_operation(pdf, name)
+else:
+mupdf.pdf_begin_implicit_operation(pdf)
+def journal_stop_op(self):
+"""End a journalling operation."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+mupdf.pdf_end_operation(pdf)
+def journal_undo(self):
+"""Move backwards in the journal."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+mupdf.pdf_undo(pdf)
+return True
+@property
+def language(self):
+"""Document language."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return
+lang = mupdf.pdf_document_language(pdf)
+if lang == mupdf.FZ_LANG_UNSET:
+return
+return mupdf.fz_string_from_text_language2(lang)
+@property
+def last_location(self):
+"""Id (chapter, page) of last page."""
+if self.is_closed:
+raise ValueError("document closed")
+last_loc = mupdf.fz_last_page(self.this)
+return last_loc.chapter, last_loc.page
+def layer_ui_configs(self):
+"""Show OC visibility status modifiable by user."""
+pdf = _as_pdf_document(self)
+info = mupdf.PdfLayerConfigUi()
+n = mupdf.pdf_count_layer_config_ui( pdf)
+rc = []
+for i in range(n):
+mupdf.pdf_layer_config_ui_info( pdf, i, info)
+if info.type == 1:
+type_ = "checkbox"
+elif info.type == 2:
+type_ = "radiobox"
+else:
+type_ = "label"
+item = {
+"number": i,
+"text": info.text,
+"depth": info.depth,
+"type": type_,
+"on": info.selected,
+"locked": info.locked,
+}
+rc.append(item)
+return rc
+def layout(self, rect=None, width=0, height=0, fontsize=11):
+"""Re-layout a reflowable document."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+doc = self.this
+if not mupdf.fz_is_document_reflowable( doc):
+return
+w = width
+h = height
+r = JM_rect_from_py(rect)
+if not mupdf.fz_is_infinite_rect(r):
+w = r.x1 - r.x0
+h = r.y1 - r.y0
+if w <= 0.0 or h <= 0.0:
+raise ValueError( "bad page size")
+mupdf.fz_layout_document( doc, w, h, fontsize)
+self._reset_page_refs()
+self.init_doc()
+def load_page(self, page_id):
+"""Load a page.
+'page_id' is either a 0-based page number or a tuple (chapter, pno),
+with chapter number and page number within that chapter.
+"""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if page_id is None:
+page_id = 0
+if page_id not in self:
+raise ValueError("page not in document")
+if type(page_id) is int and page_id < 0:
+np = self.page_count
+while page_id < 0:
+page_id += np
+if isinstance(page_id, int):
+page = mupdf.fz_load_page(self.this, page_id)
+else:
+chapter, pagenum = page_id
+page = mupdf.fz_load_chapter_page(self.this, chapter, pagenum)
+val = Page(page, self)
+val.thisown = True
+val.parent = self
+self._page_refs[id(val)] = val
+val._annot_refs = weakref.WeakValueDictionary()
+val.number = page_id
+return val
+def location_from_page_number(self, pno):
+"""Convert pno to (chapter, page)."""
+if self.is_closed:
+raise ValueError("document closed")
+this_doc = self.this
+loc = mupdf.fz_make_location(-1, -1)
+page_count = mupdf.fz_count_pages(this_doc)
+while pno < 0:
+pno += page_count
+if pno >= page_count:
+raise ValueError( MSG_BAD_PAGENO)
+loc = mupdf.fz_location_from_page_number(this_doc, pno)
+return loc.chapter, loc.page
+def make_bookmark(self, loc):
+"""Make a page pointer before layouting document."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+loc = mupdf.FzLocation(*loc)
+mark = mupdf.ll_fz_make_bookmark2( self.this.m_internal, loc.internal())
+return mark
+@property
+def markinfo(self) -> dict:
+"""Return the PDF MarkInfo value."""
+xref = self.pdf_catalog()
+if xref == 0:
+return None
+rc = self.xref_get_key(xref, "MarkInfo")
+if rc[0] == "null":
+return {}
+if rc[0] == "xref":
+xref = int(rc[1].split()[0])
+val = self.xref_object(xref, compressed=True)
+elif rc[0] == "dict":
+val = rc[1]
+else:
+val = None
+if val is None or not (val[:2] == "<<" and val[-2:] == ">>"):
+return {}
+valid = {"Marked": False, "UserProperties": False, "Suspects": False}
+val = val[2:-2].split("/")
+for v in val[1:]:
+try:
+key, value = v.split()
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+return valid
+if value == "true":
+valid[key] = True
+return valid
+def move_page(self, pno: int, to: int =-1):
+"""Move a page within a PDF document.
+Args:
+pno: source page number.
+to: put before this page, '-1' means after last page.
+"""
+if self.is_closed:
+raise ValueError("document closed")
+page_count = len(self)
+if (pno not in range(page_count) or to not in range(-1, page_count)):
+raise ValueError("bad page number(s)")
+before = 1
+copy = 0
+if to == -1:
+to = page_count - 1
+before = 0
+return self._move_copy_page(pno, to, before, copy)
+@property
+def name(self):
+return self._name
+def need_appearances(self, value=None):
+"""Get/set the NeedAppearances value."""
+if not self.is_form_pdf:
+return None
+pdf = _as_pdf_document(self)
+oldval = -1
+appkey = "NeedAppearances"
+form = mupdf.pdf_dict_getp(
+mupdf.pdf_trailer(pdf),
+"Root/AcroForm",
+)
+app = mupdf.pdf_dict_gets(form, appkey)
+if mupdf.pdf_is_bool(app):
+oldval = mupdf.pdf_to_bool(app)
+if value:
+mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_TRUE)
+else:
+mupdf.pdf_dict_puts(form, appkey, mupdf.PDF_FALSE)
+if value is None:
+return oldval >= 0
+return value
+@property
+def needs_pass(self):
+"""Indicate password required."""
+if self.is_closed:
+raise ValueError("document closed")
+document = self.this if isinstance(self.this, mupdf.FzDocument) else self.this.super()
+ret = mupdf.fz_needs_password( document)
+return ret
+def next_location(self, page_id):
+"""Get (chapter, page) of next page."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if type(page_id) is int:
+page_id = (0, page_id)
+if page_id not in self:
+raise ValueError("page id not in document")
+if tuple(page_id) == self.last_location:
+return ()
+this_doc = _as_fz_document(self)
+val = page_id[ 0]
+if not isinstance(val, int):
+RAISEPY(MSG_BAD_PAGEID, PyExc_ValueError)
+chapter = val
+val = page_id[ 1]
+pno = val
+loc = mupdf.fz_make_location(chapter, pno)
+next_loc = mupdf.fz_next_page( this_doc, loc)
+return next_loc.chapter, next_loc.page
+def page_annot_xrefs(self, n):
+if g_use_extra:
+return extra.page_annot_xrefs( self.this, n)
+if isinstance(self.this, mupdf.PdfDocument):
+page_count = mupdf.pdf_count_pages(self.this)
+pdf_document = self.this
+else:
+page_count = mupdf.fz_count_pages(self.this)
+pdf_document = _as_pdf_document(self)
+while n < 0:
+n += page_count
+if n > page_count:
+raise ValueError( MSG_BAD_PAGENO)
+page_obj = mupdf.pdf_lookup_page_obj(pdf_document, n)
+annots = JM_get_annot_xref_list(page_obj)
+return annots
+@property
+def page_count(self):
+"""Number of pages."""
+if self.is_closed:
+raise ValueError('document closed')
+if g_use_extra:
+return self.page_count2(self)
+if isinstance( self.this, mupdf.FzDocument):
+return mupdf.fz_count_pages( self.this)
+else:
+return mupdf.pdf_count_pages( self.this)
+def page_cropbox(self, pno):
+"""Get CropBox of page number (without loading page)."""
+if self.is_closed:
+raise ValueError("document closed")
+this_doc = self.this
+page_count = mupdf.fz_count_pages( this_doc)
+n = pno
+while n < 0:
+n += page_count
+pdf = _as_pdf_document(self)
+if n >= page_count:
+raise ValueError( MSG_BAD_PAGENO)
+pageref = mupdf.pdf_lookup_page_obj( pdf, n)
+cropbox = JM_cropbox(pageref)
+val = JM_py_from_rect(cropbox)
+val = Rect(val)
+return val
+def page_number_from_location(self, page_id):
+"""Convert (chapter, pno) to page number."""
+if type(page_id) is int:
+np = self.page_count
+while page_id < 0:
+page_id += np
+page_id = (0, page_id)
+if page_id not in self:
+raise ValueError("page id not in document")
+chapter, pno = page_id
+loc = mupdf.fz_make_location( chapter, pno)
+page_n = mupdf.fz_page_number_from_location( self.this, loc)
+return page_n
+def page_xref(self, pno):
+"""Get xref of page number."""
+if g_use_extra:
+return extra.page_xref( self.this, pno)
+if self.is_closed:
+raise ValueError("document closed")
+page_count = mupdf.fz_count_pages(self.this)
+n = pno
+while n < 0:
+n += page_count
+pdf = _as_pdf_document(self)
+xref = 0
+if n >= page_count:
+raise ValueError( MSG_BAD_PAGENO)
+xref = mupdf.pdf_to_num(mupdf.pdf_lookup_page_obj(pdf, n))
+return xref
+@property
+def pagelayout(self) -> str:
+"""Return the PDF PageLayout value.
+"""
+xref = self.pdf_catalog()
+if xref == 0:
+return None
+rc = self.xref_get_key(xref, "PageLayout")
+if rc[0] == "null":
+return "SinglePage"
+if rc[0] == "name":
+return rc[1][1:]
+return "SinglePage"
+@property
+def pagemode(self) -> str:
+"""Return the PDF PageMode value.
+"""
+xref = self.pdf_catalog()
+if xref == 0:
+return None
+rc = self.xref_get_key(xref, "PageMode")
+if rc[0] == "null":
+return "UseNone"
+if rc[0] == "name":
+return rc[1][1:]
+return "UseNone"
+if sys.implementation.version < (3, 9):
+# Appending `[Page]` causes `TypeError: 'ABCMeta' object is not subscriptable`.
+_pages_ret = collections.abc.Iterable
+else:
+_pages_ret = collections.abc.Iterable[Page]
+def pages(self, start: OptInt =None, stop: OptInt =None, step: OptInt =None) -> _pages_ret:
+"""Return a generator iterator over a page range.
+Arguments have the same meaning as for the range() built-in.
+"""
+if not self.page_count:
+return
+# set the start value
+start = start or 0
+while start < 0:
+start += self.page_count
+if start not in range(self.page_count):
+raise ValueError("bad start page number")
+# set the stop value
+stop = stop if stop is not None and stop <= self.page_count else self.page_count
+# set the step value
+if step == 0:
+raise ValueError("arg 3 must not be zero")
+if step is None:
+if start > stop:
+step = -1
+else:
+step = 1
+for pno in range(start, stop, step):
+yield (self.load_page(pno))
+def pdf_catalog(self):
+"""Get xref of PDF catalog."""
+pdf = _as_pdf_document(self, required=0)
+xref = 0
+if not pdf.m_internal:
+return xref
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+xref = mupdf.pdf_to_num(root)
+return xref
+def pdf_trailer(self, compressed=0, ascii=0):
+"""Get PDF trailer as a string."""
+return self.xref_object(-1, compressed=compressed, ascii=ascii)
+@property
+def permissions(self):
+"""Document permissions."""
+if self.is_encrypted:
+return 0
+doc =self.this
+pdf = mupdf.pdf_document_from_fz_document(doc)
+# for PDF return result of standard function
+if pdf.m_internal:
+return mupdf.pdf_document_permissions(pdf)
+# otherwise simulate the PDF return value
+perm = 0xFFFFFFFC   # all permissions granted
+# now switch off where needed
+if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_PRINT):
+perm = perm ^ mupdf.PDF_PERM_PRINT
+if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_EDIT):
+perm = perm ^ mupdf.PDF_PERM_MODIFY
+if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_COPY):
+perm = perm ^ mupdf.PDF_PERM_COPY
+if not mupdf.fz_has_permission(doc, mupdf.FZ_PERMISSION_ANNOTATE):
+perm = perm ^ mupdf.PDF_PERM_ANNOTATE
+return perm
+def prev_location(self, page_id):
+"""Get (chapter, page) of previous page."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if type(page_id) is int:
+page_id = (0, page_id)
+if page_id not in self:
+raise ValueError("page id not in document")
+if page_id  == (0, 0):
+return ()
+chapter, pno = page_id
+loc = mupdf.fz_make_location(chapter, pno)
+prev_loc = mupdf.fz_previous_page(self.this, loc)
+return prev_loc.chapter, prev_loc.page
+def reload_page(self, page: Page) -> Page:
+"""Make a fresh copy of a page."""
+old_annots = {}  # copy annot references to here
+pno = page.number  # save the page number
+for k, v in page._annot_refs.items():  # save the annot dictionary
+old_annots[k] = v
+# When we call `self.load_page()` below, it will end up in
+# fz_load_chapter_page(), which will return any matching page in the
+# document's list of non-ref-counted loaded pages, instead of actually
+# reloading the page.
+#
+# We want to assert that we have actually reloaded the fz_page, and not
+# simply returned the same `fz_page*` pointer from the document's list
+# of non-ref-counted loaded pages.
+#
+# So we first remove our reference to the `fz_page*`. This will
+# decrement .refs, and if .refs was 1, this is guaranteed to free the
+# `fz_page*` and remove it from the document's list if it was there. So
+# we are guaranteed that our returned `fz_page*` is from a genuine
+# reload, even if it happens to reuse the original block of memory.
+#
+# However if the original .refs is greater than one, there must be
+# other references to the `fz_page` somewhere, and we require that
+# these other references are not keeping the page in the document's
+# list.  We check that we are returning a newly loaded page by
+# asserting that our returned `fz_page*` is different from the original
+# `fz_page*` - the original was not freed, so a new `fz_page` cannot
+# reuse the same block of memory.
+#
+refs_old = page.this.m_internal.refs
+m_internal_old = page.this.m_internal_value()
+page.this = None
+page._erase()  # remove the page
+page = None
+TOOLS.store_shrink(100)
+page = self.load_page(pno)  # reload the page
+# copy annot refs over to the new dictionary
+#page_proxy = weakref.proxy(page)
+for k, v in old_annots.items():
+annot = old_annots[k]
+#annot.parent = page_proxy  # refresh parent to new page
+page._annot_refs[k] = annot
+if refs_old == 1:
+# We know that `page.this = None` will have decremented the ref
+# count to zero so we are guaranteed that the new `fz_page` is a
+# new page even if it happens to have reused the same block of
+# memory.
+pass
+else:
+# Check that the new `fz_page*` is different from the original.
+m_internal_new = page.this.m_internal_value()
+assert m_internal_new != m_internal_old, \
+f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
+return page
+def resolve_link(self, uri=None, chapters=0):
+"""Calculate internal link destination.
+Args:
+uri: (str) some Link.uri
+chapters: (bool) whether to use (chapter, page) format
+Returns:
+(page_id, x, y) where x, y are point coordinates on the page.
+page_id is either page number (if chapters=0), or (chapter, pno).
+"""
+if not uri:
+if chapters:
+return (-1, -1), 0, 0
+return -1, 0, 0
+try:
+loc, xp, yp = mupdf.fz_resolve_link(self.this, uri)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+if chapters:
+return (-1, -1), 0, 0
+return -1, 0, 0
+if chapters:
+return (loc.chapter, loc.page), xp, yp
+pno = mupdf.fz_page_number_from_location(self.this, loc)
+return pno, xp, yp
+def rewrite_images(
+self,
+dpi_threshold=None,
+dpi_target=0,
+quality=0,
+lossy=True,
+lossless=True,
+bitonal=True,
+color=True,
+gray=True,
+set_to_gray=False,
+options=None,
+):
+"""Rewrite images in a PDF document.
+The typical use case is to reduce the size of the PDF by recompressing
+images. Default parameters will convert all images to JPEG where
+possible, using the specified resolutions and quality. Exclude
+undesired images by setting parameters to False.
+Args:
+dpi_threshold: look at images with a larger DPI only.
+dpi_target: change eligible images to this DPI.
+quality: Quality of the recompressed images (0-100).
+lossy: process lossy image types (e.g. JPEG).
+lossless: process lossless image types (e.g. PNG).
+bitonal: process black-and-white images (e.g. FAX)
+color: process colored images.
+gray: process gray images.
+set_to_gray: whether to change the PDF to gray at process start.
+options: (PdfImageRewriterOptions) Custom options for image
+rewriting (optional). Expert use only. If provided, other
+parameters are ignored, except set_to_gray.
+"""
+quality_str = str(quality)
+if not dpi_threshold:
+dpi_threshold = dpi_target = 0
+if dpi_target > 0 and dpi_target >= dpi_threshold:
+raise ValueError("{dpi_target=} must be less than {dpi_threshold=}")
+template_opts = mupdf.PdfImageRewriterOptions()
+dir1 = set(dir(template_opts))  # for checking that only existing options are set
+if not options:
+opts = mupdf.PdfImageRewriterOptions()
+if bitonal:
+opts.bitonal_image_recompress_method = mupdf.FZ_RECOMPRESS_FAX
+opts.bitonal_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
+opts.bitonal_image_subsample_to = dpi_target
+opts.bitonal_image_recompress_quality = quality_str
+opts.bitonal_image_subsample_threshold = dpi_threshold
+if color:
+if lossless:
+opts.color_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
+opts.color_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
+opts.color_lossless_image_subsample_to = dpi_target
+opts.color_lossless_image_subsample_threshold = dpi_threshold
+opts.color_lossless_image_recompress_quality = quality_str
+if lossy:
+opts.color_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
+opts.color_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
+opts.color_lossy_image_subsample_threshold = dpi_threshold
+opts.color_lossy_image_subsample_to = dpi_target
+opts.color_lossy_image_recompress_quality = quality_str
+if gray:
+if lossless:
+opts.gray_lossless_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
+opts.gray_lossless_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
+opts.gray_lossless_image_subsample_to = dpi_target
+opts.gray_lossless_image_subsample_threshold = dpi_threshold
+opts.gray_lossless_image_recompress_quality = quality_str
+if lossy:
+opts.gray_lossy_image_recompress_method = mupdf.FZ_RECOMPRESS_JPEG
+opts.gray_lossy_image_subsample_method = mupdf.FZ_SUBSAMPLE_AVERAGE
+opts.gray_lossy_image_subsample_threshold = dpi_threshold
+opts.gray_lossy_image_subsample_to = dpi_target
+opts.gray_lossy_image_recompress_quality = quality_str
+else:
+opts = options
+dir2 = set(dir(opts))  # checking that only possible options were used
+invalid_options = dir2 - dir1
+if invalid_options:
+raise ValueError(f"Invalid options: {invalid_options}")
+if set_to_gray:
+self.recolor(1)
+pdf = _as_pdf_document(self)
+mupdf.pdf_rewrite_images(pdf, opts)
+def recolor(self, components=1):
+"""Change the color component count on all pages.
+Args:
+components: (int) desired color component count, one of 1, 3, 4.
+Invokes the same-named method for all pages.
+"""
+if not self.is_pdf:
+raise ValueError("is no PDF")
+for i in range(self.page_count):
+self.load_page(i).recolor(components)
+def resolve_names(self):
+"""Convert the PDF's destination names into a Python dict.
+The only parameter is the pymupdf.Document.
+All names found in the catalog under keys "/Dests" and "/Names/Dests" are
+being included.
+Returns:
+A dcitionary with the following layout:
+- key: (str) the name
+- value: (dict) with the following layout:
+* "page":  target page number (0-based). If no page number found -1.
+* "to": (x, y) target point on page - currently in PDF coordinates,
+i.e. point (0,0) is the bottom-left of the page.
+* "zoom": (float) the zoom factor
+* "dest": (str) only occurs if the target location on the page has
+not been provided as "/XYZ" or if no page number was found.
+Examples:
+{'__bookmark_1': {'page': 0, 'to': (0.0, 541.0), 'zoom': 0.0},
+'__bookmark_2': {'page': 0, 'to': (0.0, 481.45), 'zoom': 0.0}}
+or
+'21154a7c20684ceb91f9c9adc3b677c40': {'page': -1, 'dest': '/XYZ 15.75 1486 0'}, ...
+"""
+if hasattr(self, "_resolved_names"):  # do not execute multiple times!
+return self._resolved_names
+# this is a backward listing of page xref to page number
+page_xrefs = {self.page_xref(i): i for i in range(self.page_count)}
+def obj_string(obj):
+"""Return string version of a PDF object definition."""
+buffer = mupdf.fz_new_buffer(512)
+output = mupdf.FzOutput(buffer)
+mupdf.pdf_print_obj(output, obj, 1, 0)
+output.fz_close_output()
+return JM_UnicodeFromBuffer(buffer)
+def get_array(val):
+"""Generate value of one item of the names dictionary."""
+templ_dict = {"page": -1, "dest": ""}  # value template
+if val.pdf_is_indirect():
+val = mupdf.pdf_resolve_indirect(val)
+if val.pdf_is_array():
+array = obj_string(val)
+elif val.pdf_is_dict():
+array = obj_string(mupdf.pdf_dict_gets(val, "D"))
+else:  # if all fails return the empty template
+return templ_dict
+# replace PDF "null" by zero, omit the square brackets
+array = array.replace("null", "0")[1:-1]
+# find stuff before first "/"
+idx = array.find("/")
+if idx < 1:  # this has no target page spec
+templ_dict["dest"] = array  # return the orig. string
+return templ_dict
+subval = array[:idx].strip()  # stuff before "/"
+array = array[idx:]  # stuff from "/" onwards
+templ_dict["dest"] = array
+# if we start with /XYZ: extract x, y, zoom
+# 1, 2 or 3 of these values may actually be supplied
+if array.startswith("/XYZ"):
+del templ_dict["dest"]  # don't return orig string in this case
+# make a list of the 3 tokens following "/XYZ"
+array_list = array.split()[1:4]  # omit "/XYZ"
+# fill up missing tokens with "0" strings
+while len(array_list) < 3:  # fill up if too short
+array_list.append("0")  # add missing values
+# make list of 3 floats: x, y and zoom
+t = list(map(float, array_list))  # the resulting x, y, z values
+templ_dict["to"] = (t[0], t[1])
+templ_dict["zoom"] = t[2]
+# extract page number
+if subval.endswith("0 R"):  # page xref given?
+templ_dict["page"] = page_xrefs.get(int(subval.split()[0]),-1)
+else:  # naked page number given
+templ_dict["page"] = int(subval)
+return templ_dict
+def fill_dict(dest_dict, pdf_dict):
+"""Generate name resolution items for pdf_dict.
+This may be either "/Names/Dests" or just "/Dests"
+"""
+# length of the PDF dictionary
+name_count = mupdf.pdf_dict_len(pdf_dict)
+# extract key-val of each dict item
+for i in range(name_count):
+key = mupdf.pdf_dict_get_key(pdf_dict, i)
+val = mupdf.pdf_dict_get_val(pdf_dict, i)
+if key.pdf_is_name():  # this should always be true!
+dict_key = key.pdf_to_name()
+else:
+message(f"key {i} is no /Name")
+dict_key = None
+if dict_key:
+dest_dict[dict_key] = get_array(val)  # store key/value in dict
+# access underlying PDF document of fz Document
+pdf = mupdf.pdf_document_from_fz_document(self)
+# access PDF catalog
+catalog = mupdf.pdf_dict_gets(mupdf.pdf_trailer(pdf), "Root")
+dest_dict = {}
+# make PDF_NAME(Dests)
+dests = mupdf.pdf_new_name("Dests")
+# extract destinations old style (PDF 1.1)
+old_dests = mupdf.pdf_dict_get(catalog, dests)
+if old_dests.pdf_is_dict():
+fill_dict(dest_dict, old_dests)
+# extract destinations new style (PDF 1.2+)
+tree = mupdf.pdf_load_name_tree(pdf, dests)
+if tree.pdf_is_dict():
+fill_dict(dest_dict, tree)
+self._resolved_names = dest_dict  # store result or reuse
+return dest_dict
+def save(
+self,
+filename,
+garbage=0,
+clean=0,
+deflate=0,
+deflate_images=0,
+deflate_fonts=0,
+incremental=0,
+ascii=0,
+expand=0,
+linear=0,
+no_new_id=0,
+appearance=0,
+pretty=0,
+encryption=1,
+permissions=4095,
+owner_pw=None,
+user_pw=None,
+preserve_metadata=1,
+use_objstms=0,
+compression_effort=0,
+):
+# From %pythonprepend save
+#
+"""Save PDF to file, pathlib.Path or file pointer."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if type(filename) is str:
+pass
+elif hasattr(filename, "open"):  # assume: pathlib.Path
+filename = str(filename)
+elif hasattr(filename, "name"):  # assume: file object
+filename = filename.name
+elif not hasattr(filename, "seek"):  # assume file object
+raise ValueError("filename must be str, Path or file object")
+if filename == self.name and not incremental:
+raise ValueError("save to original must be incremental")
+if linear and use_objstms:
+raise ValueError("'linear' and 'use_objstms' cannot both be requested")
+if self.page_count < 1:
+raise ValueError("cannot save with zero pages")
+if incremental:
+if self.name != filename or self.stream:
+raise ValueError("incremental needs original file")
+if user_pw and len(user_pw) > 40 or owner_pw and len(owner_pw) > 40:
+raise ValueError("password length must not exceed 40")
+pdf = _as_pdf_document(self)
+opts = mupdf.PdfWriteOptions()
+opts.do_incremental = incremental
+opts.do_ascii = ascii
+opts.do_compress = deflate
+opts.do_compress_images = deflate_images
+opts.do_compress_fonts = deflate_fonts
+opts.do_decompress = expand
+opts.do_garbage = garbage
+opts.do_pretty = pretty
+opts.do_linear = linear
+opts.do_clean = clean
+opts.do_sanitize = clean
+opts.dont_regenerate_id = no_new_id
+opts.do_appearance = appearance
+opts.do_encrypt = encryption
+opts.permissions = permissions
+if owner_pw is not None:
+opts.opwd_utf8_set_value(owner_pw)
+elif user_pw is not None:
+opts.opwd_utf8_set_value(user_pw)
+if user_pw is not None:
+opts.upwd_utf8_set_value(user_pw)
+opts.do_preserve_metadata = preserve_metadata
+opts.do_use_objstms = use_objstms
+opts.compression_effort = compression_effort
+out = None
+pdf.m_internal.resynth_required = 0
+JM_embedded_clean(pdf)
+if no_new_id == 0:
+JM_ensure_identity(pdf)
+if isinstance(filename, str):
+#log( 'calling mupdf.pdf_save_document()')
+mupdf.pdf_save_document(pdf, filename, opts)
+else:
+out = JM_new_output_fileptr(filename)
+#log( f'{type(out)=} {type(out.this)=}')
+mupdf.pdf_write_document(pdf, out, opts)
+out.fz_close_output()
+def save_snapshot(self, filename):
+"""Save a file snapshot suitable for journalling."""
+if self.is_closed:
+raise ValueError("doc is closed")
+if type(filename) is str:
+pass
+elif hasattr(filename, "open"):  # assume: pathlib.Path
+filename = str(filename)
+elif hasattr(filename, "name"):  # assume: file object
+filename = filename.name
+else:
+raise ValueError("filename must be str, Path or file object")
+if filename == self.name:
+raise ValueError("cannot snapshot to original")
+pdf = _as_pdf_document(self)
+mupdf.pdf_save_snapshot(pdf, filename)
+def saveIncr(self):
+""" Save PDF incrementally"""
+return self.save(self.name, incremental=True, encryption=mupdf.PDF_ENCRYPT_KEEP)
+def select(self, pyliste):
+"""Build sub-pdf with page numbers in the list."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if not self.is_pdf:
+raise ValueError("is no PDF")
+if not hasattr(pyliste, "__getitem__"):
+raise ValueError("sequence required")
+valid_range = range(len(self))
+if (len(pyliste) == 0
+or min(pyliste) not in valid_range
+or max(pyliste) not in valid_range
+):
+raise ValueError("bad page number(s)")
+# get underlying pdf document,
+pdf = _as_pdf_document(self)
+# create page sub-pdf via pdf_rearrange_pages2().
+#
+if mupdf_version_tuple >= (1, 25, 3):
+# We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
+# which, for example, breaks test_3705.
+mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
+else:
+mupdf.pdf_rearrange_pages2(pdf, pyliste)
+# remove any existing pages with their kids
+self._reset_page_refs()
+def set_language(self, language=None):
+pdf = _as_pdf_document(self)
+if not language:
+lang = mupdf.FZ_LANG_UNSET
+else:
+lang = mupdf.fz_text_language_from_string(language)
+mupdf.pdf_set_document_language(pdf, lang)
+return True
+def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, locked=None):
+"""Set the PDF keys /ON, /OFF, /RBGroups of an OC layer."""
+if self.is_closed:
+raise ValueError("document closed")
+ocgs = set(self.get_ocgs().keys())
+if ocgs == set():
+raise ValueError("document has no optional content")
+if on:
+if type(on) not in (list, tuple):
+raise ValueError("bad type: 'on'")
+s = set(on).difference(ocgs)
+if s != set():
+raise ValueError("bad OCGs in 'on': %s" % s)
+if off:
+if type(off) not in (list, tuple):
+raise ValueError("bad type: 'off'")
+s = set(off).difference(ocgs)
+if s != set():
+raise ValueError("bad OCGs in 'off': %s" % s)
+if locked:
+if type(locked) not in (list, tuple):
+raise ValueError("bad type: 'locked'")
+s = set(locked).difference(ocgs)
+if s != set():
+raise ValueError("bad OCGs in 'locked': %s" % s)
+if rbgroups:
+if type(rbgroups) not in (list, tuple):
+raise ValueError("bad type: 'rbgroups'")
+for x in rbgroups:
+if not type(x) in (list, tuple):
+raise ValueError("bad RBGroup '%s'" % x)
+s = set(x).difference(ocgs)
+if s != set():
+raise ValueError("bad OCGs in RBGroup: %s" % s)
+if basestate:
+basestate = str(basestate).upper()
+if basestate == "UNCHANGED":
+basestate = "Unchanged"
+if basestate not in ("ON", "OFF", "Unchanged"):
+raise ValueError("bad 'basestate'")
+pdf = _as_pdf_document(self)
+ocp = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer( pdf),
+PDF_NAME('Root'),
+PDF_NAME('OCProperties'),
+)
+if not ocp.m_internal:
+return
+if config == -1:
+obj = mupdf.pdf_dict_get( ocp, PDF_NAME('D'))
+else:
+obj = mupdf.pdf_array_get(
+mupdf.pdf_dict_get( ocp, PDF_NAME('Configs')),
+config,
+)
+if not obj.m_internal:
+raise ValueError( MSG_BAD_OC_CONFIG)
+JM_set_ocg_arrays( obj, basestate, on, off, rbgroups, locked)
+mupdf.ll_pdf_read_ocg( pdf.m_internal)
+def set_layer_ui_config(self, number, action=0):
+"""Set / unset OC intent configuration."""
+# The user might have given the name instead of sequence number,
+# so select by that name and continue with corresp. number
+if isinstance(number, str):
+select = [ui["number"] for ui in self.layer_ui_configs() if ui["text"] == number]
+if select == []:
+raise ValueError(f"bad OCG '{number}'.")
+number = select[0]  # this is the number for the name
+pdf = _as_pdf_document(self)
+if action == 1:
+mupdf.pdf_toggle_layer_config_ui(pdf, number)
+elif action == 2:
+mupdf.pdf_deselect_layer_config_ui(pdf, number)
+else:
+mupdf.pdf_select_layer_config_ui(pdf, number)
+def set_markinfo(self, markinfo: dict) -> bool:
+"""Set the PDF MarkInfo values."""
+xref = self.pdf_catalog()
+if xref == 0:
+raise ValueError("not a PDF")
+if not markinfo or not isinstance(markinfo, dict):
+return False
+valid = {"Marked": False, "UserProperties": False, "Suspects": False}
+if not set(valid.keys()).issuperset(markinfo.keys()):
+badkeys = f"bad MarkInfo key(s): {set(markinfo.keys()).difference(valid.keys())}"
+raise ValueError(badkeys)
+pdfdict = "<<"
+valid.update(markinfo)
+for key, value in valid.items():
+value=str(value).lower()
+if value not in ("true", "false"):
+raise ValueError(f"bad key value '{key}': '{value}'")
+pdfdict += f"/{key} {value}"
+pdfdict += ">>"
+self.xref_set_key(xref, "MarkInfo", pdfdict)
+return True
+def set_pagelayout(self, pagelayout: str):
+"""Set the PDF PageLayout value."""
+valid = ("SinglePage", "OneColumn", "TwoColumnLeft", "TwoColumnRight", "TwoPageLeft", "TwoPageRight")
+xref = self.pdf_catalog()
+if xref == 0:
+raise ValueError("not a PDF")
+if not pagelayout:
+raise ValueError("bad PageLayout value")
+if pagelayout[0] == "/":
+pagelayout = pagelayout[1:]
+for v in valid:
+if pagelayout.lower() == v.lower():
+self.xref_set_key(xref, "PageLayout", f"/{v}")
+return True
+raise ValueError("bad PageLayout value")
+def set_pagemode(self, pagemode: str):
+"""Set the PDF PageMode value."""
+valid = ("UseNone", "UseOutlines", "UseThumbs", "FullScreen", "UseOC", "UseAttachments")
+xref = self.pdf_catalog()
+if xref == 0:
+raise ValueError("not a PDF")
+if not pagemode:
+raise ValueError("bad PageMode value")
+if pagemode[0] == "/":
+pagemode = pagemode[1:]
+for v in valid:
+if pagemode.lower() == v.lower():
+self.xref_set_key(xref, "PageMode", f"/{v}")
+return True
+raise ValueError("bad PageMode value")
+def set_xml_metadata(self, metadata):
+"""Store XML document level metadata."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
+if not root.m_internal:
+RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
+res = mupdf.fz_new_buffer_from_copied_data( metadata.encode('utf-8'))
+xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
+if xml.m_internal:
+JM_update_stream( pdf, xml, res, 0)
+else:
+xml = mupdf.pdf_add_stream( pdf, res, mupdf.PdfObj(), 0)
+mupdf.pdf_dict_put( xml, PDF_NAME('Type'), PDF_NAME('Metadata'))
+mupdf.pdf_dict_put( xml, PDF_NAME('Subtype'), PDF_NAME('XML'))
+mupdf.pdf_dict_put( root, PDF_NAME('Metadata'), xml)
+def switch_layer(self, config, as_default=0):
+"""Activate an OC layer."""
+pdf = _as_pdf_document(self)
+cfgs = mupdf.pdf_dict_getl(
+mupdf.pdf_trailer( pdf),
+PDF_NAME('Root'),
+PDF_NAME('OCProperties'),
+PDF_NAME('Configs')
+)
+if not mupdf.pdf_is_array( cfgs) or not mupdf.pdf_array_len( cfgs):
+if config < 1:
+return
+raise ValueError( MSG_BAD_OC_LAYER)
+if config < 0:
+return
+mupdf.pdf_select_layer_config( pdf, config)
+if as_default:
+mupdf.pdf_set_layer_config_as_default( pdf)
+mupdf.ll_pdf_read_ocg( pdf.m_internal)
+def update_object(self, xref, text, page=None):
+"""Replace object definition source."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len(pdf)
+if not _INRANGE(xref, 1, xreflen-1):
+RAISEPY("bad xref", MSG_BAD_XREF)
+ENSURE_OPERATION(pdf)
+# create new object with passed-in string
+new_obj = JM_pdf_obj_from_str(pdf, text)
+mupdf.pdf_update_object(pdf, xref, new_obj)
+if page:
+JM_refresh_links( _as_pdf_page(page))
+def update_stream(self, xref=0, stream=None, new=1, compress=1):
+"""Replace xref stream part."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len(pdf)
+if xref < 1 or xref > xreflen:
+raise ValueError( MSG_BAD_XREF)
+# get the object
+obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+if not mupdf.pdf_is_dict(obj):
+raise ValueError( MSG_IS_NO_DICT)
+res = JM_BufferFromBytes(stream)
+if not res.m_internal:
+raise TypeError( MSG_BAD_BUFFER)
+JM_update_stream(pdf, obj, res, compress)
+pdf.dirty = 1
+@property
+def version_count(self):
+'''
+Count versions of PDF document.
+'''
+pdf = _as_pdf_document(self, required=0)
+if pdf.m_internal:
+return mupdf.pdf_count_versions(pdf)
+return 0
+def write(
+self,
+garbage=False,
+clean=False,
+deflate=False,
+deflate_images=False,
+deflate_fonts=False,
+incremental=False,
+ascii=False,
+expand=False,
+linear=False,
+no_new_id=False,
+appearance=False,
+pretty=False,
+encryption=1,
+permissions=4095,
+owner_pw=None,
+user_pw=None,
+preserve_metadata=1,
+use_objstms=0,
+compression_effort=0,
+):
+from io import BytesIO
+bio = BytesIO()
+self.save(
+bio,
+garbage=garbage,
+clean=clean,
+no_new_id=no_new_id,
+appearance=appearance,
+deflate=deflate,
+deflate_images=deflate_images,
+deflate_fonts=deflate_fonts,
+incremental=incremental,
+ascii=ascii,
+expand=expand,
+linear=linear,
+pretty=pretty,
+encryption=encryption,
+permissions=permissions,
+owner_pw=owner_pw,
+user_pw=user_pw,
+preserve_metadata=preserve_metadata,
+use_objstms=use_objstms,
+compression_effort=compression_effort,
+)
+return bio.getvalue()
+@property
+def xref(self):
+"""PDF xref number of page."""
+CheckParent(self)
+return self.parent.page_xref(self.number)
+def xref_get_key(self, xref, key):
+"""Get PDF dict key value of object at 'xref'."""
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len(pdf)
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref > 0:
+obj = mupdf.pdf_load_object(pdf, xref)
+else:
+obj = mupdf.pdf_trailer(pdf)
+if not obj.m_internal:
+return ("null", "null")
+subobj = mupdf.pdf_dict_getp(obj, key)
+if not subobj.m_internal:
+return ("null", "null")
+text = None
+if mupdf.pdf_is_indirect(subobj):
+type = "xref"
+text = "%i 0 R" % mupdf.pdf_to_num(subobj)
+elif mupdf.pdf_is_array(subobj):
+type = "array"
+elif mupdf.pdf_is_dict(subobj):
+type = "dict"
+elif mupdf.pdf_is_int(subobj):
+type = "int"
+text = "%i" % mupdf.pdf_to_int(subobj)
+elif mupdf.pdf_is_real(subobj):
+type = "float"
+elif mupdf.pdf_is_null(subobj):
+type = "null"
+text = "null"
+elif mupdf.pdf_is_bool(subobj):
+type = "bool"
+if mupdf.pdf_to_bool(subobj):
+text = "true"
+else:
+text = "false"
+elif mupdf.pdf_is_name(subobj):
+type = "name"
+text = "/%s" % mupdf.pdf_to_name(subobj)
+elif mupdf.pdf_is_string(subobj):
+type = "string"
+text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj))
+else:
+type = "unknown"
+if text is None:
+res = JM_object_to_buffer(subobj, 1, 0)
+text = JM_UnicodeFromBuffer(res)
+return (type, text)
+def xref_get_keys(self, xref):
+"""Get the keys of PDF dict object at 'xref'. Use -1 for the PDF trailer."""
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len( pdf)
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref > 0:
+obj = mupdf.pdf_load_object( pdf, xref)
+else:
+obj = mupdf.pdf_trailer( pdf)
+n = mupdf.pdf_dict_len( obj)
+rc = []
+if n == 0:
+return rc
+for i in range(n):
+key = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( obj, i))
+rc.append(key)
+return rc
+def xref_is_font(self, xref):
+"""Check if xref is a font object."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if self.xref_get_key(xref, "Type")[1] == "/Font":
+return True
+return False
+def xref_is_image(self, xref):
+"""Check if xref is an image object."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if self.xref_get_key(xref, "Subtype")[1] == "/Image":
+return True
+return False
+def xref_is_stream(self, xref=0):
+"""Check if xref is a stream object."""
+pdf = _as_pdf_document(self, required=0)
+if not pdf.m_internal:
+return False    # not a PDF
+return bool(mupdf.pdf_obj_num_is_stream(pdf, xref))
+def xref_is_xobject(self, xref):
+"""Check if xref is a form xobject."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+if self.xref_get_key(xref, "Subtype")[1] == "/Form":
+return True
+return False
+def xref_length(self):
+"""Get length of xref table."""
+xreflen = 0
+pdf = _as_pdf_document(self, required=0)
+if pdf.m_internal:
+xreflen = mupdf.pdf_xref_len(pdf)
+return xreflen
+def xref_object(self, xref, compressed=0, ascii=0):
+"""Get xref object source as a string."""
+if self.is_closed:
+raise ValueError("document closed")
+if g_use_extra:
+ret = extra.xref_object( self.this, xref, compressed, ascii)
+return ret
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len(pdf)
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref > 0:
+obj = mupdf.pdf_load_object(pdf, xref)
+else:
+obj = mupdf.pdf_trailer(pdf)
+res = JM_object_to_buffer(mupdf.pdf_resolve_indirect(obj), compressed, ascii)
+text = JM_EscapeStrFromBuffer(res)
+return text
+def xref_set_key(self, xref, key, value):
+"""Set the value of a PDF dictionary key."""
+if self.is_closed:
+raise ValueError("document closed")
+if not key or not isinstance(key, str) or INVALID_NAME_CHARS.intersection(key) not in (set(), {"/"}):
+raise ValueError("bad 'key'")
+if not isinstance(value, str) or not value or value[0] == "/" and INVALID_NAME_CHARS.intersection(value[1:]) != set():
+raise ValueError("bad 'value'")
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len(pdf)
+#if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+#    THROWMSG("bad xref")
+#if len(value) == 0:
+#    THROWMSG("bad 'value'")
+#if len(key) == 0:
+#    THROWMSG("bad 'key'")
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref != -1:
+obj = mupdf.pdf_load_object(pdf, xref)
+else:
+obj = mupdf.pdf_trailer(pdf)
+new_obj = JM_set_object_value(obj, key, value)
+if not new_obj.m_internal:
+return  # did not work: skip update
+if xref != -1:
+mupdf.pdf_update_object(pdf, xref, new_obj)
+else:
+n = mupdf.pdf_dict_len(new_obj)
+for i in range(n):
+mupdf.pdf_dict_put(
+obj,
+mupdf.pdf_dict_get_key(new_obj, i),
+mupdf.pdf_dict_get_val(new_obj, i),
+)
+def xref_stream(self, xref):
+"""Get decompressed xref stream."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len( pdf)
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref >= 0:
+obj = mupdf.pdf_new_indirect( pdf, xref, 0)
+else:
+obj = mupdf.pdf_trailer( pdf)
+r = None
+if mupdf.pdf_is_stream( obj):
+res = mupdf.pdf_load_stream_number( pdf, xref)
+r = JM_BinFromBuffer( res)
+return r
+def xref_stream_raw(self, xref):
+"""Get xref stream without decompression."""
+if self.is_closed or self.is_encrypted:
+raise ValueError("document closed or encrypted")
+pdf = _as_pdf_document(self)
+xreflen = mupdf.pdf_xref_len( pdf)
+if not _INRANGE(xref, 1, xreflen-1) and xref != -1:
+raise ValueError( MSG_BAD_XREF)
+if xref >= 0:
+obj = mupdf.pdf_new_indirect( pdf, xref, 0)
+else:
+obj = mupdf.pdf_trailer( pdf)
+r = None
+if mupdf.pdf_is_stream( obj):
+res = mupdf.pdf_load_raw_stream_number( pdf, xref)
+r = JM_BinFromBuffer( res)
+return r
+def xref_xml_metadata(self):
+"""Get xref of document XML metadata."""
+pdf = _as_pdf_document(self)
+root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
+if not root.m_internal:
+RAISEPY( MSG_BAD_PDFROOT, JM_Exc_FileDataError)
+xml = mupdf.pdf_dict_get( root, PDF_NAME('Metadata'))
+xref = 0
+if xml.m_internal:
+xref = mupdf.pdf_to_num( xml)
+return xref
+__slots__ = ('this', 'page_count2', 'this_is_pdf', '__dict__')
+outline = property(lambda self: self._outline)
+tobytes = write
+is_stream = xref_is_stream
+open = Document
+class DocumentWriter:
+def __enter__(self):
+return self
+def __exit__(self, *args):
+self.close()
+def __init__(self, path, options=''):
+if isinstance( path, str):
+pass
+elif hasattr( path, 'absolute'):
+path = str( path)
+elif hasattr( path, 'name'):
+path = path.name
+if isinstance( path, str):
+self.this = mupdf.FzDocumentWriter( path, options, mupdf.FzDocumentWriter.PathType_PDF)
+else:
+# Need to keep the Python JM_new_output_fileptr_Output instance
+# alive for the lifetime of this DocumentWriter, otherwise calls
+# to virtual methods implemented in Python fail. So we make it a
+# member of this DocumentWriter.
+#
+# Unrelated to this, mupdf.FzDocumentWriter will set
+# self._out.m_internal to null because ownership is passed in.
+#
+out = JM_new_output_fileptr( path)
+self.this = mupdf.FzDocumentWriter( out, options, mupdf.FzDocumentWriter.OutputType_PDF)
+assert out.m_internal_value() == 0
+assert hasattr( self.this, '_out')
+def begin_page( self, mediabox):
+mediabox2 = JM_rect_from_py(mediabox)
+device = mupdf.fz_begin_page( self.this, mediabox2)
+device_wrapper = DeviceWrapper( device)
+return device_wrapper
+def close( self):
+mupdf.fz_close_document_writer( self.this)
+def end_page( self):
+mupdf.fz_end_page( self.this)
+class Font:
+def __del__(self):
+if type(self) is not Font:
+return None
+def __init__(
+self,
+fontname=None,
+fontfile=None,
+fontbuffer=None,
+script=0,
+language=None,
+ordering=-1,
+is_bold=0,
+is_italic=0,
+is_serif=0,
+embed=1,
+):
+if fontbuffer:
+if hasattr(fontbuffer, "getvalue"):
+fontbuffer = fontbuffer.getvalue()
+elif isinstance(fontbuffer, bytearray):
+fontbuffer = bytes(fontbuffer)
+if not isinstance(fontbuffer, bytes):
+raise ValueError("bad type: 'fontbuffer'")
+if isinstance(fontname, str):
+fname_lower = fontname.lower()
+if "/" in fname_lower or "\\" in fname_lower or "." in fname_lower:
+message("Warning: did you mean a fontfile?")
+if fname_lower in ("cjk", "china-t", "china-ts"):
+ordering = 0
+elif fname_lower.startswith("china-s"):
+ordering = 1
+elif fname_lower.startswith("korea"):
+ordering = 3
+elif fname_lower.startswith("japan"):
+ordering = 2
+elif fname_lower in fitz_fontdescriptors.keys():
+import pymupdf_fonts  # optional fonts
+fontbuffer = pymupdf_fonts.myfont(fname_lower)  # make a copy
+fontname = None  # ensure using fontbuffer only
+del pymupdf_fonts  # remove package again
+elif ordering < 0:
+fontname = Base14_fontdict.get(fontname, fontname)
+lang = mupdf.fz_text_language_from_string(language)
+font = JM_get_font(fontname, fontfile,
+fontbuffer, script, lang, ordering,
+is_bold, is_italic, is_serif, embed)
+self.this = font
+def __repr__(self):
+return "Font('%s')" % self.name
+@property
+def ascender(self):
+"""Return the glyph ascender value."""
+return mupdf.fz_font_ascender(self.this)
+@property
+def bbox(self):
+return self.this.fz_font_bbox()
+@property
+def buffer(self):
+buffer_ = mupdf.FzBuffer( mupdf.ll_fz_keep_buffer( self.this.m_internal.buffer))
+return mupdf.fz_buffer_extract_copy( buffer_)
+def char_lengths(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
+"""Return tuple of char lengths of unicode 'text' under a fontsize."""
+lang = mupdf.fz_text_language_from_string(language)
+rc = []
+for ch in text:
+c = ord(ch)
+if small_caps:
+gid = mupdf.fz_encode_character_sc(self.this, c)
+if gid >= 0:
+font = self.this
+else:
+gid, font = mupdf.fz_encode_character_with_fallback(self.this, c, script, lang)
+rc.append(fontsize * mupdf.fz_advance_glyph(font, gid, wmode))
+return rc
+@property
+def descender(self):
+"""Return the glyph descender value."""
+return mupdf.fz_font_descender(self.this)
+@property
+def flags(self):
+f = mupdf.ll_fz_font_flags(self.this.m_internal)
+if not f:
+return
+assert isinstance( f, mupdf.fz_font_flags_t)
+#log( '{=f}')
+if mupdf_cppyy:
+# cppyy includes remaining higher bits.
+v = [f.is_mono]
+def b(bits):
+ret = v[0] & ((1 << bits)-1)
+v[0] = v[0] >> bits
+return ret
+is_mono = b(1)
+is_serif = b(1)
+is_bold = b(1)
+is_italic = b(1)
+ft_substitute = b(1)
+ft_stretch = b(1)
+fake_bold = b(1)
+fake_italic = b(1)
+has_opentype = b(1)
+invalid_bbox = b(1)
+cjk_lang = b(1)
+embed = b(1)
+never_embed = b(1)
+return {
+"mono":         is_mono if mupdf_cppyy else f.is_mono,
+"serif":        is_serif if mupdf_cppyy else f.is_serif,
+"bold":         is_bold if mupdf_cppyy else f.is_bold,
+"italic":       is_italic if mupdf_cppyy else f.is_italic,
+"substitute":   ft_substitute if mupdf_cppyy else f.ft_substitute,
+"stretch":      ft_stretch if mupdf_cppyy else f.ft_stretch,
+"fake-bold":    fake_bold if mupdf_cppyy else f.fake_bold,
+"fake-italic":  fake_italic if mupdf_cppyy else f.fake_italic,
+"opentype":     has_opentype if mupdf_cppyy else f.has_opentype,
+"invalid-bbox": invalid_bbox if mupdf_cppyy else f.invalid_bbox,
+'cjk':          cjk_lang if mupdf_cppyy else f.cjk,
+'cjk-lang':     cjk_lang if mupdf_cppyy else f.cjk_lang,
+'embed':        embed if mupdf_cppyy else f.embed,
+'never-embed':  never_embed if mupdf_cppyy else f.never_embed,
+}
+def glyph_advance(self, chr_, language=None, script=0, wmode=0, small_caps=0):
+"""Return the glyph width of a unicode (font size 1)."""
+lang = mupdf.fz_text_language_from_string(language)
+if small_caps:
+gid = mupdf.fz_encode_character_sc(self.this, chr_)
+if gid >= 0:
+font = self.this
+else:
+gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr_, script, lang)
+return mupdf.fz_advance_glyph(font, gid, wmode)
+def glyph_bbox(self, char, language=None, script=0, small_caps=0):
+"""Return the glyph bbox of a unicode (font size 1)."""
+lang = mupdf.fz_text_language_from_string(language)
+if small_caps:
+gid = mupdf.fz_encode_character_sc( self.this, char)
+if gid >= 0:
+font = self.this
+else:
+gid, font = mupdf.fz_encode_character_with_fallback( self.this, char, script, lang)
+return Rect(mupdf.fz_bound_glyph( font, gid, mupdf.FzMatrix()))
+@property
+def glyph_count(self):
+return self.this.m_internal.glyph_count
+def glyph_name_to_unicode(self, name):
+"""Return the unicode for a glyph name."""
+return glyph_name_to_unicode(name)
+def has_glyph(self, chr, language=None, script=0, fallback=0, small_caps=0):
+"""Check whether font has a glyph for this unicode."""
+if fallback:
+lang = mupdf.fz_text_language_from_string(language)
+gid, font = mupdf.fz_encode_character_with_fallback(self.this, chr, script, lang)
+else:
+if small_caps:
+gid = mupdf.fz_encode_character_sc(self.this, chr)
+else:
+gid = mupdf.fz_encode_character(self.this, chr)
+return gid
+@property
+def is_bold(self):
+return mupdf.fz_font_is_bold( self.this)
+@property
+def is_italic(self):
+return mupdf.fz_font_is_italic( self.this)
+@property
+def is_monospaced(self):
+return mupdf.fz_font_is_monospaced( self.this)
+@property
+def is_serif(self):
+return mupdf.fz_font_is_serif( self.this)
+@property
+def is_writable(self):
+return True # see pymupdf commit ef4056ee4da2
+font = self.this
+flags = mupdf.ll_fz_font_flags(font.m_internal)
+if mupdf_cppyy:
+# cppyy doesn't handle bitfields correctly.
+import cppyy
+ft_substitute = cppyy.gbl.mupdf_mfz_font_flags_ft_substitute( flags)
+else:
+ft_substitute = flags.ft_substitute
+if ( mupdf.ll_fz_font_t3_procs(font.m_internal)
+or ft_substitute
+or not mupdf.pdf_font_writing_supported(font)
+):
+return False
+return True
+@property
+def name(self):
+ret = mupdf.fz_font_name(self.this)
+#log( '{ret=}')
+return ret
+def text_length(self, text, fontsize=11, language=None, script=0, wmode=0, small_caps=0):
+"""Return length of unicode 'text' under a fontsize."""
+thisfont = self.this
+lang = mupdf.fz_text_language_from_string(language)
+rc = 0
+if not isinstance(text, str):
+raise TypeError( MSG_BAD_TEXT)
+for ch in text:
+c = ord(ch)
+if small_caps:
+gid = mupdf.fz_encode_character_sc(thisfont, c)
+if gid >= 0:
+font = thisfont
+else:
+gid, font = mupdf.fz_encode_character_with_fallback(thisfont, c, script, lang)
+rc += mupdf.fz_advance_glyph(font, gid, wmode)
+rc *= fontsize
+return rc
+def unicode_to_glyph_name(self, ch):
+"""Return the glyph name for a unicode."""
+return unicode_to_glyph_name(ch)
+def valid_codepoints(self):
+'''
+Returns sorted list of valid unicodes of a fz_font.
+'''
+ucs_gids = mupdf.fz_enumerate_font_cmap2(self.this)
+ucss = [i.ucs for i in ucs_gids]
+ucss_unique = set(ucss)
+ucss_unique_sorted = sorted(ucss_unique)
+return ucss_unique_sorted
+class Graftmap:
+def __del__(self):
+if not type(self) is Graftmap:
+return
+self.thisown = False
+def __init__(self, doc):
+dst = _as_pdf_document(doc)
+map_ = mupdf.pdf_new_graft_map(dst)
+self.this = map_
+self.thisown = True
+class Link:
+def __del__(self):
+self._erase()
+def __init__( self, this):
+assert isinstance( this, mupdf.FzLink)
+self.this = this
+def __repr__(self):
+CheckParent(self)
+return "link on " + str(self.parent)
+def __str__(self):
+CheckParent(self)
+return "link on " + str(self.parent)
+def _border(self, doc, xref):
+pdf = _as_pdf_document(doc, required=0)
+if not pdf.m_internal:
+return
+link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+if not link_obj.m_internal:
+return
+b = JM_annot_border(link_obj)
+return b
+def _colors(self, doc, xref):
+pdf = _as_pdf_document(doc, required=0)
+if not pdf.m_internal:
+return
+link_obj = mupdf.pdf_new_indirect( pdf, xref, 0)
+if not link_obj.m_internal:
+raise ValueError( MSG_BAD_XREF)
+b = JM_annot_colors( link_obj)
+return b
+def _erase(self):
+self.parent = None
+self.thisown = False
+def _setBorder(self, border, doc, xref):
+pdf = _as_pdf_document(doc, required=0)
+if not pdf.m_internal:
+return
+link_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+if not link_obj.m_internal:
+return
+b = JM_annot_set_border(border, pdf, link_obj)
+return b
+@property
+def border(self):
+return self._border(self.parent.parent.this, self.xref)
+@property
+def colors(self):
+return self._colors(self.parent.parent.this, self.xref)
+@property
+def dest(self):
+"""Create link destination details."""
+if hasattr(self, "parent") and self.parent is None:
+raise ValueError("orphaned object: parent is None")
+if self.parent.parent.is_closed or self.parent.parent.is_encrypted:
+raise ValueError("document closed or encrypted")
+doc = self.parent.parent
+if self.is_external or self.uri.startswith("#"):
+uri = None
+else:
+uri = doc.resolve_link(self.uri)
+return linkDest(self, uri, doc)
+@property
+def flags(self)->int:
+CheckParent(self)
+doc = self.parent.parent
+if not doc.is_pdf:
+return 0
+f = doc.xref_get_key(self.xref, "F")
+if f[1] != "null":
+return int(f[1])
+return 0
+@property
+def is_external(self):
+"""Flag the link as external."""
+CheckParent(self)
+if g_use_extra:
+return extra.Link_is_external( self.this)
+this_link = self.this
+if not this_link.m_internal or not this_link.m_internal.uri:
+return False
+return bool( mupdf.fz_is_external_link( this_link.m_internal.uri))
+@property
+def next(self):
+"""Next link."""
+if not self.this.m_internal:
+return None
+CheckParent(self)
+if 0 and g_use_extra:
+val = extra.Link_next( self.this)
+else:
+val = self.this.next()
+if not val.m_internal:
+return None
+val = Link( val)
+if val:
+val.thisown = True
+val.parent = self.parent  # copy owning page from prev link
+val.parent._annot_refs[id(val)] = val
+if self.xref > 0:  # prev link has an xref
+link_xrefs = [x[0] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
+link_ids = [x[2] for x in self.parent.annot_xrefs() if x[1] == mupdf.PDF_ANNOT_LINK]
+idx = link_xrefs.index(self.xref)
+val.xref = link_xrefs[idx + 1]
+val.id = link_ids[idx + 1]
+else:
+val.xref = 0
+val.id = ""
+return val
+@property
+def rect(self):
+"""Rectangle ('hot area')."""
+CheckParent(self)
+# utils.py:getLinkDict() appears to expect exceptions from us, so we
+# ensure that we raise on error.
+if self.this is None or not self.this.m_internal:
+raise Exception( 'self.this.m_internal not available')
+val = JM_py_from_rect( self.this.rect())
+val = Rect(val)
+return val
+def set_border(self, border=None, width=0, dashes=None, style=None):
+if type(border) is not dict:
+border = {"width": width, "style": style, "dashes": dashes}
+return self._setBorder(border, self.parent.parent.this, self.xref)
+def set_colors(self, colors=None, stroke=None, fill=None):
+"""Set border colors."""
+CheckParent(self)
+doc = self.parent.parent
+if type(colors) is not dict:
+colors = {"fill": fill, "stroke": stroke}
+fill = colors.get("fill")
+stroke = colors.get("stroke")
+if fill is not None:
+message("warning: links have no fill color")
+if stroke in ([], ()):
+doc.xref_set_key(self.xref, "C", "[]")
+return
+if hasattr(stroke, "__float__"):
+stroke = [float(stroke)]
+CheckColor(stroke)
+assert len(stroke) in (1, 3, 4)
+s = f"[{_format_g(stroke)}]"
+doc.xref_set_key(self.xref, "C", s)
+def set_flags(self, flags):
+CheckParent(self)
+doc = self.parent.parent
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+if not type(flags) is int:
+raise ValueError("bad 'flags' value")
+doc.xref_set_key(self.xref, "F", str(flags))
+return None
+@property
+def uri(self):
+"""Uri string."""
+#CheckParent(self)
+if g_use_extra:
+return extra.link_uri(self.this)
+this_link = self.this
+return this_link.m_internal.uri if this_link.m_internal else ''
+page = -1
+class Matrix:
+def __abs__(self):
+return math.sqrt(sum([c*c for c in self]))
+def __add__(self, m):
+if hasattr(m, "__float__"):
+return Matrix(self.a + m, self.b + m, self.c + m,
+self.d + m, self.e + m, self.f + m)
+if len(m) != 6:
+raise ValueError("Matrix: bad seq len")
+return Matrix(self.a + m[0], self.b + m[1], self.c + m[2],
+self.d + m[3], self.e + m[4], self.f + m[5])
+def __bool__(self):
+return not (max(self) == min(self) == 0)
+def __eq__(self, mat):
+if not hasattr(mat, "__len__"):
+return False
+return len(mat) == 6 and not (self - mat)
+def __getitem__(self, i):
+return (self.a, self.b, self.c, self.d, self.e, self.f)[i]
+def __init__(self, *args, a=None, b=None, c=None, d=None, e=None, f=None):
+"""
+Matrix() - all zeros
+Matrix(a, b, c, d, e, f)
+Matrix(zoom-x, zoom-y) - zoom
+Matrix(shear-x, shear-y, 1) - shear
+Matrix(degree) - rotate
+Matrix(Matrix) - new copy
+Matrix(sequence) - from 'sequence'
+Matrix(mupdf.FzMatrix) - from MuPDF class wrapper for fz_matrix.
+Explicit keyword args a, b, c, d, e, f override any earlier settings if
+not None.
+"""
+if not args:
+self.a = self.b = self.c = self.d = self.e = self.f = 0.0
+elif len(args) > 6:
+raise ValueError("Matrix: bad seq len")
+elif len(args) == 6:  # 6 numbers
+self.a, self.b, self.c, self.d, self.e, self.f = map(float, args)
+elif len(args) == 1:  # either an angle or a sequ
+if isinstance(args[0], mupdf.FzMatrix):
+self.a = args[0].a
+self.b = args[0].b
+self.c = args[0].c
+self.d = args[0].d
+self.e = args[0].e
+self.f = args[0].f
+elif hasattr(args[0], "__float__"):
+theta = math.radians(args[0])
+c_ = round(math.cos(theta), 8)
+s_ = round(math.sin(theta), 8)
+self.a = self.d = c_
+self.b = s_
+self.c = -s_
+self.e = self.f = 0.0
+else:
+self.a, self.b, self.c, self.d, self.e, self.f = map(float, args[0])
+elif len(args) == 2 or len(args) == 3 and args[2] == 0:
+self.a, self.b, self.c, self.d, self.e, self.f = float(args[0]), \
+0.0, 0.0, float(args[1]), 0.0, 0.0
+elif len(args) == 3 and args[2] == 1:
+self.a, self.b, self.c, self.d, self.e, self.f = 1.0, \
+float(args[1]), float(args[0]), 1.0, 0.0, 0.0
+else:
+raise ValueError("Matrix: bad args")
+# Override with explicit args if specified.
+if a is not None:   self.a = a
+if b is not None:   self.b = b
+if c is not None:   self.c = c
+if d is not None:   self.d = d
+if e is not None:   self.e = e
+if f is not None:   self.f = f
+def __invert__(self):
+"""Calculate inverted matrix."""
+m1 = Matrix()
+m1.invert(self)
+return m1
+def __len__(self):
+return 6
+def __mul__(self, m):
+if hasattr(m, "__float__"):
+return Matrix(self.a * m, self.b * m, self.c * m,
+self.d * m, self.e * m, self.f * m)
+m1 = Matrix(1,1)
+return m1.concat(self, m)
+def __neg__(self):
+return Matrix(-self.a, -self.b, -self.c, -self.d, -self.e, -self.f)
+def __nonzero__(self):
+return not (max(self) == min(self) == 0)
+def __pos__(self):
+return Matrix(self)
+def __repr__(self):
+return "Matrix" + str(tuple(self))
+def __setitem__(self, i, v):
+v = float(v)
+if   i == 0: self.a = v
+elif i == 1: self.b = v
+elif i == 2: self.c = v
+elif i == 3: self.d = v
+elif i == 4: self.e = v
+elif i == 5: self.f = v
+else:
+raise IndexError("index out of range")
+return
+def __sub__(self, m):
+if hasattr(m, "__float__"):
+return Matrix(self.a - m, self.b - m, self.c - m,
+self.d - m, self.e - m, self.f - m)
+if len(m) != 6:
+raise ValueError("Matrix: bad seq len")
+return Matrix(self.a - m[0], self.b - m[1], self.c - m[2],
+self.d - m[3], self.e - m[4], self.f - m[5])
+def __truediv__(self, m):
+if hasattr(m, "__float__"):
+return Matrix(self.a * 1./m, self.b * 1./m, self.c * 1./m,
+self.d * 1./m, self.e * 1./m, self.f * 1./m)
+m1 = util_invert_matrix(m)[1]
+if not m1:
+raise ZeroDivisionError("matrix not invertible")
+m2 = Matrix(1,1)
+return m2.concat(self, m1)
+def concat(self, one, two):
+"""Multiply two matrices and replace current one."""
+if not len(one) == len(two) == 6:
+raise ValueError("Matrix: bad seq len")
+self.a, self.b, self.c, self.d, self.e, self.f = util_concat_matrix(one, two)
+return self
+def invert(self, src=None):
+"""Calculate the inverted matrix. Return 0 if successful and replace
+current one. Else return 1 and do nothing.
+"""
+if src is None:
+dst = util_invert_matrix(self)
+else:
+dst = util_invert_matrix(src)
+if dst[0] == 1:
+return 1
+self.a, self.b, self.c, self.d, self.e, self.f = dst[1]
+return 0
+@property
+def is_rectilinear(self):
+"""True if rectangles are mapped to rectangles."""
+return (abs(self.b) < EPSILON and abs(self.c) < EPSILON) or \
+(abs(self.a) < EPSILON and abs(self.d) < EPSILON)
+def prerotate(self, theta):
+"""Calculate pre rotation and replace current matrix."""
+theta = float(theta)
+while theta < 0: theta += 360
+while theta >= 360: theta -= 360
+if abs(0 - theta) < EPSILON:
+pass
+elif abs(90.0 - theta) < EPSILON:
+a = self.a
+b = self.b
+self.a = self.c
+self.b = self.d
+self.c = -a
+self.d = -b
+elif abs(180.0 - theta) < EPSILON:
+self.a = -self.a
+self.b = -self.b
+self.c = -self.c
+self.d = -self.d
+elif abs(270.0 - theta) < EPSILON:
+a = self.a
+b = self.b
+self.a = -self.c
+self.b = -self.d
+self.c = a
+self.d = b
+else:
+rad = math.radians(theta)
+s = math.sin(rad)
+c = math.cos(rad)
+a = self.a
+b = self.b
+self.a = c * a + s * self.c
+self.b = c * b + s * self.d
+self.c =-s * a + c * self.c
+self.d =-s * b + c * self.d
+return self
+def prescale(self, sx, sy):
+"""Calculate pre scaling and replace current matrix."""
+sx = float(sx)
+sy = float(sy)
+self.a *= sx
+self.b *= sx
+self.c *= sy
+self.d *= sy
+return self
+def preshear(self, h, v):
+"""Calculate pre shearing and replace current matrix."""
+h = float(h)
+v = float(v)
+a, b = self.a, self.b
+self.a += v * self.c
+self.b += v * self.d
+self.c += h * a
+self.d += h * b
+return self
+def pretranslate(self, tx, ty):
+"""Calculate pre translation and replace current matrix."""
+tx = float(tx)
+ty = float(ty)
+self.e += tx * self.a + ty * self.c
+self.f += tx * self.b + ty * self.d
+return self
+__inv__ = __invert__
+__div__ = __truediv__
+norm = __abs__
+class IdentityMatrix(Matrix):
+"""Identity matrix [1, 0, 0, 1, 0, 0]"""
+def __hash__(self):
+return hash((1,0,0,1,0,0))
+def __init__(self):
+Matrix.__init__(self, 1.0, 1.0)
+def __repr__(self):
+return "IdentityMatrix(1.0, 0.0, 0.0, 1.0, 0.0, 0.0)"
+def __setattr__(self, name, value):
+if name in "ad":
+self.__dict__[name] = 1.0
+elif name in "bcef":
+self.__dict__[name] = 0.0
+else:
+self.__dict__[name] = value
+def checkargs(*args):
+raise NotImplementedError("Identity is readonly")
+Identity = IdentityMatrix()
+class linkDest:
+"""link or outline destination details"""
+def __init__(self, obj, rlink, document=None):
+isExt = obj.is_external
+isInt = not isExt
+self.dest = ""
+self.file_spec = ""
+self.flags = 0
+self.is_map = False
+self.is_uri = False
+self.kind = LINK_NONE
+self.lt = Point(0, 0)
+self.named = dict()
+self.new_window = ""
+self.page = obj.page
+self.rb = Point(0, 0)
+self.uri = obj.uri
+def uri_to_dict(uri):
+items = self.uri[1:].split('&')
+ret = dict()
+for item in items:
+eq = item.find('=')
+if eq >= 0:
+ret[item[:eq]] = item[eq+1:]
+else:
+ret[item] = None
+return ret
+def unescape(name):
+"""Unescape '%AB' substrings to chr(0xAB)."""
+split = name.replace("%%", "%25")  # take care of escaped '%'
+split = split.split("%")
+newname = split[0]
+for item in split[1:]:
+piece = item[:2]
+newname += chr(int(piece, base=16))
+newname += item[2:]
+return newname
+if rlink and not self.uri.startswith("#"):
+self.uri = f"#page={rlink[0] + 1}&zoom=0,{_format_g(rlink[1])},{_format_g(rlink[2])}"
+if obj.is_external:
+self.page = -1
+self.kind = LINK_URI
+if not self.uri:
+self.page = -1
+self.kind = LINK_NONE
+if isInt and self.uri:
+self.uri = self.uri.replace("&zoom=nan", "&zoom=0")
+if self.uri.startswith("#"):
+self.kind = LINK_GOTO
+m = re.match('^#page=([0-9]+)&zoom=([0-9.]+),(-?[0-9.]+),(-?[0-9.]+)$', self.uri)
+if m:
+self.page = int(m.group(1)) - 1
+self.lt = Point(float((m.group(3))), float(m.group(4)))
+self.flags = self.flags | LINK_FLAG_L_VALID | LINK_FLAG_T_VALID
+else:
+m = re.match('^#page=([0-9]+)$', self.uri)
+if m:
+self.page = int(m.group(1)) - 1
+else:
+self.kind = LINK_NAMED
+m = re.match('^#nameddest=(.*)', self.uri)
+assert document
+if document and m:
+named = unescape(m.group(1))
+self.named = document.resolve_names().get(named)
+if self.named is None:
+# document.resolve_names() does not contain an
+# entry for `named` so use an empty dict.
+self.named = dict()
+self.named['nameddest'] = named
+else:
+self.named = uri_to_dict(self.uri[1:])
+else:
+self.kind = LINK_NAMED
+self.named = uri_to_dict(self.uri)
+if obj.is_external:
+if not self.uri:
+pass
+elif self.uri.startswith("file:"):
+self.file_spec = self.uri[5:]
+if self.file_spec.startswith("//"):
+self.file_spec = self.file_spec[2:]
+self.is_uri = False
+self.uri = ""
+self.kind = LINK_LAUNCH
+ftab = self.file_spec.split("#")
+if len(ftab) == 2:
+if ftab[1].startswith("page="):
+self.kind = LINK_GOTOR
+self.file_spec = ftab[0]
+self.page = int(ftab[1].split("&")[0][5:]) - 1
+elif ":" in self.uri:
+self.is_uri = True
+self.kind = LINK_URI
+else:
+self.is_uri = True
+self.kind = LINK_LAUNCH
+assert isinstance(self.named, dict)
+class Widget:
+'''
+Class describing a PDF form field ("widget")
+'''
+def __init__(self):
+self.border_color = None
+self.border_style = "S"
+self.border_width = 0
+self.border_dashes = None
+self.choice_values = None  # choice fields only
+self.rb_parent = None   # radio buttons only: xref of owning parent
+self.field_name = None  # field name
+self.field_label = None # field label
+self.field_value = None
+self.field_flags = 0
+self.field_display = 0
+self.field_type = 0  # valid range 1 through 7
+self.field_type_string = None  # field type as string
+self.fill_color = None
+self.button_caption = None  # button caption
+self.is_signed = None  # True / False if signature
+self.text_color = (0, 0, 0)
+self.text_font = "Helv"
+self.text_fontsize = 0
+self.text_maxlen = 0  # text fields only
+self.text_format = 0  # text fields only
+self._text_da = ""  # /DA = default appearance
+self.script = None  # JavaScript (/A)
+self.script_stroke = None  # JavaScript (/AA/K)
+self.script_format = None  # JavaScript (/AA/F)
+self.script_change = None  # JavaScript (/AA/V)
+self.script_calc = None  # JavaScript (/AA/C)
+self.script_blur = None  # JavaScript (/AA/Bl)
+self.script_focus = None  # JavaScript (/AA/Fo) codespell:ignore
+self.rect = None  # annot value
+self.xref = 0  # annot value
+def __repr__(self):
+#return "'%s' widget on %s" % (self.field_type_string, str(self.parent))
+# No self.parent.
+return f'Widget:(field_type={self.field_type_string} script={self.script})'
+return "'%s' widget" % (self.field_type_string)
+def _adjust_font(self):
+"""Ensure text_font is from our list and correctly spelled.
+"""
+if not self.text_font:
+self.text_font = "Helv"
+return
+valid_fonts = ("Cour", "TiRo", "Helv", "ZaDb")
+for f in valid_fonts:
+if self.text_font.lower() == f.lower():
+self.text_font = f
+return
+self.text_font = "Helv"
+return
+def _checker(self):
+"""Any widget type checks.
+"""
+if self.field_type not in range(1, 8):
+raise ValueError("bad field type")
+# if setting a radio button to ON, first set Off all buttons
+# in the group - this is not done by MuPDF:
+if self.field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON and self.field_value not in (False, "Off") and hasattr(self, "parent"):
+# so we are about setting this button to ON/True
+# check other buttons in same group and set them to 'Off'
+doc = self.parent.parent
+kids_type, kids_value = doc.xref_get_key(self.xref, "Parent/Kids")
+if kids_type == "array":
+xrefs = tuple(map(int, kids_value[1:-1].replace("0 R","").split()))
+for xref in xrefs:
+if xref != self.xref:
+doc.xref_set_key(xref, "AS", "/Off")
+# the calling method will now set the intended button to on and
+# will find everything prepared for correct functioning.
+def _parse_da(self):
+"""Extract font name, size and color from default appearance string (/DA object).
+Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
+"""
+if not self._text_da:
+return
+font = "Helv"
+fsize = 0
+col = (0, 0, 0)
+dat = self._text_da.split()  # split on any whitespace
+for i, item in enumerate(dat):
+if item == "Tf":
+font = dat[i - 2][1:]
+fsize = float(dat[i - 1])
+dat[i] = dat[i-1] = dat[i-2] = ""
+continue
+if item == "g":  # unicolor text
+col = [(float(dat[i - 1]))]
+dat[i] = dat[i-1] = ""
+continue
+if item == "rg":  # RGB colored text
+col = [float(f) for f in dat[i - 3:i]]
+dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
+continue
+self.text_font = font
+self.text_fontsize = fsize
+self.text_color = col
+self._text_da = ""
+return
+def _validate(self):
+"""Validate the class entries.
+"""
+if (self.rect.is_infinite
+or self.rect.is_empty
+):
+raise ValueError("bad rect")
+if not self.field_name:
+raise ValueError("field name missing")
+if self.field_label == "Unnamed":
+self.field_label = None
+CheckColor(self.border_color)
+CheckColor(self.fill_color)
+if not self.text_color:
+self.text_color = (0, 0, 0)
+CheckColor(self.text_color)
+if not self.border_width:
+self.border_width = 0
+if not self.text_fontsize:
+self.text_fontsize = 0
+self.border_style = self.border_style.upper()[0:1]
+# standardize content of JavaScript entries
+btn_type = self.field_type in (
+mupdf.PDF_WIDGET_TYPE_BUTTON,
+mupdf.PDF_WIDGET_TYPE_CHECKBOX,
+mupdf.PDF_WIDGET_TYPE_RADIOBUTTON,
+)
+if not self.script:
+self.script = None
+elif type(self.script) is not str:
+raise ValueError("script content must be a string")
+# buttons cannot have the following script actions
+if btn_type or not self.script_calc:
+self.script_calc = None
+elif type(self.script_calc) is not str:
+raise ValueError("script_calc content must be a string")
+if btn_type or not self.script_change:
+self.script_change = None
+elif type(self.script_change) is not str:
+raise ValueError("script_change content must be a string")
+if btn_type or not self.script_format:
+self.script_format = None
+elif type(self.script_format) is not str:
+raise ValueError("script_format content must be a string")
+if btn_type or not self.script_stroke:
+self.script_stroke = None
+elif type(self.script_stroke) is not str:
+raise ValueError("script_stroke content must be a string")
+if btn_type or not self.script_blur:
+self.script_blur = None
+elif type(self.script_blur) is not str:
+raise ValueError("script_blur content must be a string")
+if btn_type or not self.script_focus:
+self.script_focus = None
+elif type(self.script_focus) is not str:
+raise ValueError("script_focus content must be a string")
+self._checker()  # any field_type specific checks
+def _sync_flags(self):
+"""Propagate the field flags.
+If this widget has a "/Parent", set its field flags and that of all
+its /Kids widgets to the value of the current widget.
+Only possible for widgets existing in the PDF.
+Returns True or False.
+"""
+if not self.xref:
+return False  # no xref: widget not in the PDF
+doc = self.parent.parent  # the owning document
+assert doc
+pdf = _as_pdf_document(doc)
+# load underlying PDF object
+pdf_widget = mupdf.pdf_load_object(pdf, self.xref)
+Parent = mupdf.pdf_dict_get(pdf_widget, PDF_NAME("Parent"))
+if not Parent.pdf_is_dict():
+return False  # no /Parent: nothing to do
+# put the field flags value into the parent field flags:
+Parent.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
+# also put that value into all kids of the Parent
+kids = Parent.pdf_dict_get(PDF_NAME("Kids"))
+if not kids.pdf_is_array():
+message("warning: malformed PDF, Parent has no Kids array")
+return False  # no /Kids: should never happen!
+for i in range(kids.pdf_array_len()):  # walk through all kids
+# access kid widget, and do some precautionary checks
+kid = kids.pdf_array_get(i)
+if not kid.pdf_is_dict():
+continue
+xref = kid.pdf_to_num()  # get xref of the kid
+if xref == self.xref:  # skip self widget
+continue
+subtype = kid.pdf_dict_get(PDF_NAME("Subtype"))
+if not subtype.pdf_to_name() == "Widget":
+continue
+# put the field flags value into the kid field flags:
+kid.pdf_dict_put_int(PDF_NAME("Ff"), self.field_flags)
+return True  # all done
+def button_states(self):
+"""Return the on/off state names for button widgets.
+A button may have 'normal' or 'pressed down' appearances. While the 'Off'
+state is usually called like this, the 'On' state is often given a name
+relating to the functional context.
+"""
+if self.field_type not in (2, 5):
+return None  # no button type
+if hasattr(self, "parent"):  # field already exists on page
+doc = self.parent.parent
+else:
+return
+xref = self.xref
+states = {"normal": None, "down": None}
+APN = doc.xref_get_key(xref, "AP/N")
+if APN[0] == "dict":
+nstates = []
+APN = APN[1][2:-2]
+apnt = APN.split("/")[1:]
+for x in apnt:
+nstates.append(x.split()[0])
+states["normal"] = nstates
+if APN[0] == "xref":
+nstates = []
+nxref = int(APN[1].split(" ")[0])
+APN = doc.xref_object(nxref)
+apnt = APN.split("/")[1:]
+for x in apnt:
+nstates.append(x.split()[0])
+states["normal"] = nstates
+APD = doc.xref_get_key(xref, "AP/D")
+if APD[0] == "dict":
+dstates = []
+APD = APD[1][2:-2]
+apdt = APD.split("/")[1:]
+for x in apdt:
+dstates.append(x.split()[0])
+states["down"] = dstates
+if APD[0] == "xref":
+dstates = []
+dxref = int(APD[1].split(" ")[0])
+APD = doc.xref_object(dxref)
+apdt = APD.split("/")[1:]
+for x in apdt:
+dstates.append(x.split()[0])
+states["down"] = dstates
+return states
+@property
+def next(self):
+return self._annot.next
+def on_state(self):
+"""Return the "On" value for button widgets.
+This is useful for radio buttons mainly. Checkboxes will always return
+"Yes". Radio buttons will return the string that is unequal to "Off"
+as returned by method button_states().
+If the radio button is new / being created, it does not yet have an
+"On" value. In this case, a warning is shown and True is returned.
+"""
+if self.field_type not in (2, 5):
+return None  # no checkbox or radio button
+bstate = self.button_states()
+if bstate is None:
+bstate = dict()
+for k in bstate.keys():
+for v in bstate[k]:
+if v != "Off":
+return v
+message("warning: radio button has no 'On' value.")
+return True
+def reset(self):
+"""Reset the field value to its default.
+"""
+TOOLS._reset_widget(self._annot)
+def update(self, sync_flags=False):
+"""Reflect Python object in the PDF."""
+self._validate()
+self._adjust_font()  # ensure valid text_font name
+# now create the /DA string
+self._text_da = ""
+if   len(self.text_color) == 3:
+fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf" + self._text_da
+elif len(self.text_color) == 1:
+fmt = "{:g} g /{f:s} {s:g} Tf" + self._text_da
+elif len(self.text_color) == 4:
+fmt = "{:g} {:g} {:g} {:g} k /{f:s} {s:g} Tf" + self._text_da
+self._text_da = fmt.format(*self.text_color, f=self.text_font,
+s=self.text_fontsize)
+# finally update the widget
+# if widget has a '/AA/C' script, make sure it is in the '/CO'
+# array of the '/AcroForm' dictionary.
+if self.script_calc:  # there is a "calculation" script:
+# make sure we are in the /CO array
+util_ensure_widget_calc(self._annot)
+# finally update the widget
+TOOLS._save_widget(self._annot, self)
+self._text_da = ""
+if sync_flags:
+self._sync_flags()  # propagate field flags to parent and kids
+from . import _extra
+class Outline:
+def __init__(self, ol):
+self.this = ol
+@property
+def dest(self):
+'''outline destination details'''
+return linkDest(self, None, None)
+def destination(self, document):
+'''
+Like `dest` property but uses `document` to resolve destinations for
+kind=LINK_NAMED.
+'''
+return linkDest(self, None, document)
+@property
+def down(self):
+ol = self.this
+down_ol = ol.down()
+if not down_ol.m_internal:
+return
+return Outline(down_ol)
+@property
+def is_external(self):
+if g_use_extra:
+# calling _extra.* here appears to save significant time in
+# test_toc.py:test_full_toc, 1.2s=>0.94s.
+#
+return _extra.Outline_is_external( self.this)
+ol = self.this
+if not ol.m_internal:
+return False
+uri = ol.m_internal.uri if 1 else ol.uri()
+if uri is None:
+return False
+return mupdf.fz_is_external_link(uri)
+@property
+def is_open(self):
+if 1:
+return self.this.m_internal.is_open
+return self.this.is_open()
+@property
+def next(self):
+ol = self.this
+next_ol = ol.next()
+if not next_ol.m_internal:
+return
+return Outline(next_ol)
+@property
+def page(self):
+if 1:
+return self.this.m_internal.page.page
+return self.this.page().page
+@property
+def title(self):
+return self.this.m_internal.title
+@property
+def uri(self):
+ol = self.this
+if not ol.m_internal:
+return None
+return ol.m_internal.uri
+@property
+def x(self):
+return self.this.m_internal.x
+@property
+def y(self):
+return self.this.m_internal.y
+__slots__ = [ 'this']
+def _make_PdfFilterOptions(
+recurse=0,
+instance_forms=0,
+ascii=0,
+no_update=0,
+sanitize=0,
+sopts=None,
+):
+'''
+Returns a mupdf.PdfFilterOptions instance.
+'''
+filter_ = mupdf.PdfFilterOptions()
+filter_.recurse = recurse
+filter_.instance_forms = instance_forms
+filter_.ascii = ascii
+filter_.no_update = no_update
+if sanitize:
+# We want to use a PdfFilterFactory whose `.filter` fn pointer is
+# set to MuPDF's `pdf_new_sanitize_filter()`. But not sure how to
+# get access to this raw fn in Python; and on Windows raw MuPDF
+# functions are not even available to C++.
+#
+# So we use SWIG Director to implement our own
+# PdfFilterFactory whose `filter()` method calls
+# `mupdf.ll_pdf_new_sanitize_filter()`.
+if sopts:
+assert isinstance(sopts, mupdf.PdfSanitizeFilterOptions)
+else:
+sopts = mupdf.PdfSanitizeFilterOptions()
+class Factory(mupdf.PdfFilterFactory2):
+def __init__(self):
+super().__init__()
+self.use_virtual_filter()
+self.sopts = sopts
+def filter(self, ctx, doc, chain, struct_parents, transform, options):
+if 0:
+log(f'sanitize filter.filter():')
+log(f'    {self=}')
+log(f'    {ctx=}')
+log(f'    {doc=}')
+log(f'    {chain=}')
+log(f'    {struct_parents=}')
+log(f'    {transform=}')
+log(f'    {options=}')
+log(f'    {self.sopts.internal()=}')
+return mupdf.ll_pdf_new_sanitize_filter(
+doc,
+chain,
+struct_parents,
+transform,
+options,
+self.sopts.internal(),
+)
+factory = Factory()
+filter_.add_factory(factory.internal())
+filter_._factory = factory
+return filter_
+class Page:
+def __init__(self, page, document):
+assert isinstance(page, (mupdf.FzPage, mupdf.PdfPage)), f'page is: {page}'
+self.this = page
+self.thisown = True
+self.last_point = None
+self.draw_cont = ''
+self._annot_refs = dict()
+self.parent = document
+if page.m_internal:
+if isinstance( page, mupdf.PdfPage):
+self.number = page.m_internal.super.number
+else:
+self.number = page.m_internal.number
+else:
+self.number = None
+def __repr__(self):
+return self.__str__()
+CheckParent(self)
+x = self.parent.name
+if self.parent.stream is not None:
+x = "<memory, doc# %i>" % (self.parent._graft_id,)
+if x == "":
+x = "<new PDF, doc# %i>" % self.parent._graft_id
+return "page %s of %s" % (self.number, x)
+def __str__(self):
+#CheckParent(self)
+parent = getattr(self, 'parent', None)
+if isinstance(self.this.m_internal, mupdf.pdf_page):
+number = self.this.m_internal.super.number
+else:
+number = self.this.m_internal.number
+ret = f'page {number}'
+if parent:
+x = self.parent.name
+if self.parent.stream is not None:
+x = "<memory, doc# %i>" % (self.parent._graft_id,)
+if x == "":
+x = "<new PDF, doc# %i>" % self.parent._graft_id
+ret += f' of {x}'
+return ret
+def _add_caret_annot(self, point):
+if g_use_extra:
+annot = extra._add_caret_annot( self.this, JM_point_from_py(point))
+else:
+page = self._pdf_page()
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_CARET)
+if point:
+p = JM_point_from_py(point)
+r = mupdf.pdf_annot_rect(annot)
+r = mupdf.FzRect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+return annot
+def _add_file_annot(self, point, buffer_, filename, ufilename=None, desc=None, icon=None):
+page = self._pdf_page()
+uf = ufilename if ufilename else filename
+d = desc if desc else filename
+p = JM_point_from_py(point)
+filebuf = JM_BufferFromBytes(buffer_)
+if not filebuf.m_internal:
+raise TypeError( MSG_BAD_BUFFER)
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FILE_ATTACHMENT)
+r = mupdf.pdf_annot_rect(annot)
+r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
+mupdf.pdf_set_annot_rect(annot, r)
+flags = mupdf.PDF_ANNOT_IS_PRINT
+mupdf.pdf_set_annot_flags(annot, flags)
+if icon:
+mupdf.pdf_set_annot_icon_name(annot, icon)
+val = JM_embed_file(page.doc(), filebuf, filename, uf, d, 1)
+mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('FS'), val)
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('Contents'), filename)
+mupdf.pdf_update_annot(annot)
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_set_annot_flags(annot, flags)
+JM_add_annot_id(annot, "A")
+return Annot(annot)
+def _add_freetext_annot(
+self, rect,
+text,
+fontsize=11,
+fontname=None,
+text_color=None,
+fill_color=None,
+border_color=None,
+border_width=0,
+dashes=None,
+callout=None,
+line_end=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
+opacity=1,
+align=0,
+rotate=0,
+richtext=False,
+style=None,
+):
+rc = f"""<?xml version="1.0"?>
+<body xmlns="http://www.w3.org/1999/xtml"
+xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
+xfa:contentType="text/html" xfa:APIVersion="Acrobat:8.0.0" xfa:spec="2.4">
+{text}"""
+page = self._pdf_page()
+if border_color and not richtext:
+raise ValueError("cannot set border_color if rich_text is False")
+if border_color and not text_color:
+text_color = border_color
+nfcol, fcol = JM_color_FromSequence(fill_color)
+ntcol, tcol = JM_color_FromSequence(text_color)
+r = JM_rect_from_py(rect)
+if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
+raise ValueError( MSG_BAD_RECT)
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_FREE_TEXT)
+annot_obj = mupdf.pdf_annot_obj(annot)
+#insert text as 'contents' or 'RC' depending on 'richtext'
+if not richtext:
+mupdf.pdf_set_annot_contents(annot, text)
+else:
+mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("RC"), rc)
+if style:
+mupdf.pdf_dict_put_text_string(annot_obj,PDF_NAME("DS"), style)
+mupdf.pdf_set_annot_rect(annot, r)
+while rotate < 0:
+rotate += 360
+while rotate >= 360:
+rotate -= 360
+if rotate != 0:
+mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('Rotate'), rotate)
+mupdf.pdf_set_annot_quadding(annot, align)
+if nfcol > 0:
+mupdf.pdf_set_annot_color(annot, fcol[:nfcol])
+mupdf.pdf_set_annot_border_width(annot, border_width)
+mupdf.pdf_set_annot_opacity(annot, opacity)
+if dashes:
+for d in dashes:
+mupdf.pdf_add_annot_border_dash_item(annot, float(d))
+# Insert callout information
+if callout:
+mupdf.pdf_dict_put(annot_obj, PDF_NAME("IT"), PDF_NAME("FreeTextCallout"))
+mupdf.pdf_set_annot_callout_style(annot, line_end)
+point_count = len(callout)
+extra.JM_set_annot_callout_line(annot, tuple(callout), point_count)
+# insert the default appearance string
+if not richtext:
+JM_make_annot_DA(annot, ntcol, tcol, fontname, fontsize)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+val = Annot(annot)
+return val
+def _add_ink_annot(self, list):
+page = _as_pdf_page(self.this)
+if not PySequence_Check(list):
+raise ValueError( MSG_BAD_ARG_INK_ANNOT)
+ctm = mupdf.FzMatrix()
+mupdf.pdf_page_transform(page, mupdf.FzRect(0), ctm)
+inv_ctm = mupdf.fz_invert_matrix(ctm)
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_INK)
+annot_obj = mupdf.pdf_annot_obj(annot)
+n0 = len(list)
+inklist = mupdf.pdf_new_array(page.doc(), n0)
+for j in range(n0):
+sublist = list[j]
+n1 = len(sublist)
+stroke = mupdf.pdf_new_array(page.doc(), 2 * n1)
+for i in range(n1):
+p = sublist[i]
+if not PySequence_Check(p) or PySequence_Size(p) != 2:
+raise ValueError( MSG_BAD_ARG_INK_ANNOT)
+point = mupdf.fz_transform_point(JM_point_from_py(p), inv_ctm)
+mupdf.pdf_array_push_real(stroke, point.x)
+mupdf.pdf_array_push_real(stroke, point.y)
+mupdf.pdf_array_push(inklist, stroke)
+mupdf.pdf_dict_put(annot_obj, PDF_NAME('InkList'), inklist)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+return Annot(annot)
+def _add_line_annot(self, p1, p2):
+page = self._pdf_page()
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_LINE)
+a = JM_point_from_py(p1)
+b = JM_point_from_py(p2)
+mupdf.pdf_set_annot_line(annot, a, b)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+assert annot.m_internal
+return Annot(annot)
+def _add_multiline(self, points, annot_type):
+page = self._pdf_page()
+if len(points) < 2:
+raise ValueError( MSG_BAD_ARG_POINTS)
+annot = mupdf.pdf_create_annot(page, annot_type)
+for p in points:
+if (PySequence_Size(p) != 2):
+raise ValueError( MSG_BAD_ARG_POINTS)
+point = JM_point_from_py(p)
+mupdf.pdf_add_annot_vertex(annot, point)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+return Annot(annot)
+def _add_redact_annot(self, quad, text=None, da_str=None, align=0, fill=None, text_color=None):
+page = self._pdf_page()
+fcol = [ 1, 1, 1, 0]
+nfcol = 0
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_REDACT)
+q = JM_quad_from_py(quad)
+r = mupdf.fz_rect_from_quad(q)
+# TODO calculate de-rotated rect
+mupdf.pdf_set_annot_rect(annot, r)
+if fill:
+nfcol, fcol = JM_color_FromSequence(fill)
+arr = mupdf.pdf_new_array(page.doc(), nfcol)
+for i in range(nfcol):
+mupdf.pdf_array_push_real(arr, fcol[i])
+mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME('IC'), arr)
+if text:
+assert da_str
+mupdf.pdf_dict_puts(
+mupdf.pdf_annot_obj(annot),
+"OverlayText",
+mupdf.pdf_new_text_string(text),
+)
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), PDF_NAME('DA'), da_str)
+mupdf.pdf_dict_put_int(mupdf.pdf_annot_obj(annot), PDF_NAME('Q'), align)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+annot = mupdf.ll_pdf_keep_annot(annot.m_internal)
+annot = mupdf.PdfAnnot( annot)
+return Annot(annot)
+def _add_square_or_circle(self, rect, annot_type):
+page = self._pdf_page()
+r = JM_rect_from_py(rect)
+if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
+raise ValueError( MSG_BAD_RECT)
+annot = mupdf.pdf_create_annot(page, annot_type)
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+assert annot.m_internal
+return Annot(annot)
+def _add_stamp_annot(self, rect, stamp=0):
+rect = Rect(rect)
+r = JM_rect_from_py(rect)
+if mupdf.fz_is_infinite_rect(r) or mupdf.fz_is_empty_rect(r):
+raise ValueError(MSG_BAD_RECT)
+page = self._pdf_page()
+stamp_id = [
+"Approved",
+"AsIs",
+"Confidential",
+"Departmental",
+"Experimental",
+"Expired",
+"Final",
+"ForComment",
+"ForPublicRelease",
+"NotApproved",
+"NotForPublicRelease",
+"Sold",
+"TopSecret",
+"Draft",
+]
+n = len(stamp_id)
+buf = None
+name = None
+if stamp in range(n):
+name = stamp_id[stamp]
+elif isinstance(stamp, Pixmap):
+buf = stamp.tobytes()
+elif isinstance(stamp, str):
+buf = pathlib.Path(stamp).read_bytes()
+elif isinstance(stamp, (bytes, bytearray)):
+buf = stamp
+elif isinstance(stamp, io.BytesIO):
+buf = stamp.getvalue()
+else:
+name = stamp_id[0]
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_STAMP)
+if buf:  # image stamp
+fzbuff = mupdf.fz_new_buffer_from_copied_data(buf)
+img = mupdf.fz_new_image_from_buffer(fzbuff)
+# compute image boundary box on page
+w, h = img.w(), img.h()
+scale = min(rect.width / w, rect.height / h)
+width = w * scale  # bbox width
+height = h * scale  # bbox height
+# center of "rect"
+center = (rect.tl + rect.br) / 2
+x0 = center.x - width / 2
+y0 = center.y - height / 2
+x1 = x0 + width
+y1 = y0 + height
+r = mupdf.fz_make_rect(x0, y0, x1, y1)
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_set_annot_stamp_image(annot, img)
+mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), mupdf.pdf_new_name("ImageStamp"))
+mupdf.pdf_set_annot_contents(annot, "Image Stamp")
+else:  # text stamp
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_dict_put(mupdf.pdf_annot_obj(annot), PDF_NAME("Name"), PDF_NAME(name))
+mupdf.pdf_set_annot_contents(annot, name)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+return Annot(annot)
+def _add_text_annot(self, point, text, icon=None):
+page = self._pdf_page()
+p = JM_point_from_py( point)
+annot = mupdf.pdf_create_annot(page, mupdf.PDF_ANNOT_TEXT)
+r = mupdf.pdf_annot_rect(annot)
+r = mupdf.fz_make_rect(p.x, p.y, p.x + r.x1 - r.x0, p.y + r.y1 - r.y0)
+mupdf.pdf_set_annot_rect(annot, r)
+mupdf.pdf_set_annot_contents(annot, text)
+if icon:
+mupdf.pdf_set_annot_icon_name(annot, icon)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+return Annot(annot)
+def _add_text_marker(self, quads, annot_type):
+CheckParent(self)
+if not self.parent.is_pdf:
+raise ValueError("is no PDF")
+val = Page__add_text_marker(self, quads, annot_type)
+if not val:
+return None
+val.parent = weakref.proxy(self)
+self._annot_refs[id(val)] = val
+return val
+def _addAnnot_FromString(self, linklist):
+"""Add links from list of object sources."""
+CheckParent(self)
+if g_use_extra:
+self.__class__._addAnnot_FromString = extra.Page_addAnnot_FromString
+#log('Page._addAnnot_FromString() deferring to extra.Page_addAnnot_FromString().')
+return extra.Page_addAnnot_FromString( self.this, linklist)
+page = _as_pdf_page(self.this)
+lcount = len(linklist)  # link count
+if lcount < 1:
+return
+i = -1
+# insert links from the provided sources
+if not isinstance(linklist, tuple):
+raise ValueError( "bad 'linklist' argument")
+if not mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots')).m_internal:
+mupdf.pdf_dict_put_array( page.obj(), PDF_NAME('Annots'), lcount)
+annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
+assert annots.m_internal, f'{lcount=} {annots.m_internal=}'
+for i in range(lcount):
+txtpy = linklist[i]
+text = JM_StrAsChar(txtpy)
+if not text:
+message("skipping bad link / annot item %i.", i)
+continue
+try:
+annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text))
+ind_obj = mupdf.pdf_new_indirect( page.doc(), mupdf.pdf_to_num( annot), 0)
+mupdf.pdf_array_push( annots, ind_obj)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+message("skipping bad link / annot item %i.\n" % i)
+def _addWidget(self, field_type, field_name):
+page = self._pdf_page()
+pdf = page.doc()
+annot = JM_create_widget(pdf, page, field_type, field_name)
+if not annot.m_internal:
+raise RuntimeError( "cannot create widget")
+JM_add_annot_id(annot, "W")
+return Annot(annot)
+def _apply_redactions(self, text, images, graphics):
+page = self._pdf_page()
+opts = mupdf.PdfRedactOptions()
+opts.black_boxes = 0  # no black boxes
+opts.text = text  # how to treat text
+opts.image_method = images  # how to treat images
+opts.line_art = graphics  # how to treat vector graphics
+success = mupdf.pdf_redact_page(page.doc(), page, opts)
+return success
+def _erase(self):
+self._reset_annot_refs()
+try:
+self.parent._forget_page(self)
+except Exception:
+exception_info()
+pass
+self.parent = None
+self.thisown = False
+self.number = None
+self.this = None
+def _count_q_balance(self):
+"""Count missing graphic state pushs and pops.
+Returns:
+A pair of integers (push, pop). Push is the number of missing
+PDF "q" commands, pop is the number of "Q" commands.
+A balanced graphics state for the page will be reached if its
+/Contents is prepended with 'push' copies of string "q\n"
+and appended with 'pop' copies of "\nQ".
+"""
+page = _as_pdf_page(self)  # need the underlying PDF page
+res = mupdf.pdf_dict_get(  # access /Resources
+page.obj(),
+mupdf.PDF_ENUM_NAME_Resources,
+)
+cont = mupdf.pdf_dict_get(  # access /Contents
+page.obj(),
+mupdf.PDF_ENUM_NAME_Contents,
+)
+pdf = _as_pdf_document(self.parent)  # need underlying PDF document
+# return value of MuPDF function
+return mupdf.pdf_count_q_balance_outparams_fn(pdf, res, cont)
+def _get_optional_content(self, oc: OptInt) -> OptStr:
+if oc is None or oc == 0:
+return None
+doc = self.parent
+check = doc.xref_object(oc, compressed=True)
+if not ("/Type/OCG" in check or "/Type/OCMD" in check):
+#log( 'raising "bad optional content"')
+raise ValueError("bad optional content: 'oc'")
+#log( 'Looking at self._get_resource_properties()')
+props = {}
+for p, x in self._get_resource_properties():
+props[x] = p
+if oc in props.keys():
+return props[oc]
+i = 0
+mc = "MC%i" % i
+while mc in props.values():
+i += 1
+mc = "MC%i" % i
+self._set_resource_property(mc, oc)
+#log( 'returning {mc=}')
+return mc
+def _get_resource_properties(self):
+'''
+page list Resource/Properties
+'''
+page = self._pdf_page()
+rc = JM_get_resource_properties(page.obj())
+return rc
+def _get_textpage(self, clip=None, flags=0, matrix=None):
+if g_use_extra:
+ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
+tpage = mupdf.FzStextPage(ll_tpage)
+return tpage
+page = self.this
+options = mupdf.FzStextOptions(flags)
+rect = JM_rect_from_py(clip)
+# Default to page's rect if `clip` not specified, for #2048.
+rect = mupdf.fz_bound_page(page) if clip is None else JM_rect_from_py(clip)
+ctm = JM_matrix_from_py(matrix)
+tpage = mupdf.FzStextPage(rect)
+dev = mupdf.fz_new_stext_device(tpage, options)
+if _globals.no_device_caching:
+mupdf.fz_enable_device_hints( dev, mupdf.FZ_NO_CACHE)
+if isinstance(page, mupdf.FzPage):
+pass
+elif isinstance(page, mupdf.PdfPage):
+page = page.super()
+else:
+assert 0, f'Unrecognised {type(page)=}'
+mupdf.fz_run_page(page, dev, ctm, mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+return tpage
+def _insert_image(self,
+filename=None, pixmap=None, stream=None, imask=None, clip=None,
+overlay=1, rotate=0, keep_proportion=1, oc=0, width=0, height=0,
+xref=0, alpha=-1, _imgname=None, digests=None
+):
+maskbuf = mupdf.FzBuffer()
+page = self._pdf_page()
+# This will create an empty PdfDocument with a call to
+# pdf_new_document() then assign page.doc()'s return value to it (which
+# drop the original empty pdf_document).
+pdf = page.doc()
+w = width
+h = height
+img_xref = xref
+rc_digest = 0
+do_process_pixmap = 1
+do_process_stream = 1
+do_have_imask = 1
+do_have_image = 1
+do_have_xref = 1
+if xref > 0:
+ref = mupdf.pdf_new_indirect(pdf, xref, 0)
+w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
+h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
+if w + h == 0:
+raise ValueError( MSG_IS_NO_IMAGE)
+#goto have_xref()
+do_process_pixmap = 0
+do_process_stream = 0
+do_have_imask = 0
+do_have_image = 0
+else:
+if stream:
+imgbuf = JM_BufferFromBytes(stream)
+do_process_pixmap = 0
+else:
+if filename:
+imgbuf = mupdf.fz_read_file(filename)
+#goto have_stream()
+do_process_pixmap = 0
+if do_process_pixmap:
+#log( 'do_process_pixmap')
+# process pixmap ---------------------------------
+arg_pix = pixmap.this
+w = arg_pix.w()
+h = arg_pix.h()
+digest = mupdf.fz_md5_pixmap2(arg_pix)
+md5_py = digest
+temp = digests.get(md5_py, None)
+if temp is not None:
+img_xref = temp
+ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
+#goto have_xref()
+do_process_stream = 0
+do_have_imask = 0
+do_have_image = 0
+else:
+if arg_pix.alpha() == 0:
+image = mupdf.fz_new_image_from_pixmap(arg_pix, mupdf.FzImage())
+else:
+pm = mupdf.fz_convert_pixmap(
+arg_pix,
+mupdf.FzColorspace(),
+mupdf.FzColorspace(),
+mupdf.FzDefaultColorspaces(None),
+mupdf.FzColorParams(),
+1,
+)
+pm.alpha = 0
+pm.colorspace = None
+mask = mupdf.fz_new_image_from_pixmap(pm, mupdf.FzImage())
+image = mupdf.fz_new_image_from_pixmap(arg_pix, mask)
+#goto have_image()
+do_process_stream = 0
+do_have_imask = 0
+if do_process_stream:
+#log( 'do_process_stream')
+# process stream ---------------------------------
+state = mupdf.FzMd5()
+if mupdf_cppyy:
+mupdf.fz_md5_update_buffer( state, imgbuf)
+else:
+mupdf.fz_md5_update(state, imgbuf.m_internal.data, imgbuf.m_internal.len)
+if imask:
+maskbuf = JM_BufferFromBytes(imask)
+if mupdf_cppyy:
+mupdf.fz_md5_update_buffer( state, maskbuf)
+else:
+mupdf.fz_md5_update(state, maskbuf.m_internal.data, maskbuf.m_internal.len)
+digest = mupdf.fz_md5_final2(state)
+md5_py = bytes(digest)
+temp = digests.get(md5_py, None)
+if temp is not None:
+img_xref = temp
+ref = mupdf.pdf_new_indirect(page.doc(), img_xref, 0)
+w = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Width'), PDF_NAME('W')))
+h = mupdf.pdf_to_int( mupdf.pdf_dict_geta( ref, PDF_NAME('Height'), PDF_NAME('H')))
+#goto have_xref()
+do_have_imask = 0
+do_have_image = 0
+else:
+image = mupdf.fz_new_image_from_buffer(imgbuf)
+w = image.w()
+h = image.h()
+if not imask:
+#goto have_image()
+do_have_imask = 0
+if do_have_imask:
+# `fz_compressed_buffer` is reference counted and
+# `mupdf.fz_new_image_from_compressed_buffer2()`
+# is povided as a Swig-friendly wrapper for
+# `fz_new_image_from_compressed_buffer()`, so we can do things
+# straightfowardly.
+#
+cbuf1 = mupdf.fz_compressed_image_buffer( image)
+if not cbuf1.m_internal:
+raise ValueError( "uncompressed image cannot have mask")
+bpc = image.bpc()
+colorspace = image.colorspace()
+xres, yres = mupdf.fz_image_resolution(image)
+mask = mupdf.fz_new_image_from_buffer(maskbuf)
+image = mupdf.fz_new_image_from_compressed_buffer2(
+w,
+h,
+bpc,
+colorspace,
+xres,
+yres,
+1,  # interpolate
+0,  # imagemask,
+list(), # decode
+list(), # colorkey
+cbuf1,
+mask,
+)
+if do_have_image:
+#log( 'do_have_image')
+ref = mupdf.pdf_add_image(pdf, image)
+if oc:
+JM_add_oc_object(pdf, ref, oc)
+img_xref = mupdf.pdf_to_num(ref)
+digests[md5_py] = img_xref
+rc_digest = 1
+if do_have_xref:
+#log( 'do_have_xref')
+resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
+if not resources.m_internal:
+resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
+xobject = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
+if not xobject.m_internal:
+xobject = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 2)
+mat = calc_image_matrix(w, h, clip, rotate, keep_proportion)
+mupdf.pdf_dict_puts(xobject, _imgname, ref)
+nres = mupdf.fz_new_buffer(50)
+s = f"\nq\n{_format_g((mat.a, mat.b, mat.c, mat.d, mat.e, mat.f))} cm\n/{_imgname} Do\nQ\n"
+#s = s.replace('\n', '\r\n')
+mupdf.fz_append_string(nres, s)
+JM_insert_contents(pdf, page.obj(), nres, overlay)
+if rc_digest:
+return img_xref, digests
+else:
+return img_xref, None
+def _insertFont(self, fontname, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
+page = self._pdf_page()
+pdf = page.doc()
+value = JM_insert_font(pdf, bfname, fontfile,fontbuffer, set_simple, idx, wmode, serif, encoding, ordering)
+# get the objects /Resources, /Resources/Font
+resources = mupdf.pdf_dict_get_inheritable(page.obj(), PDF_NAME('Resources'))
+if not resources.pdf_is_dict():
+resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME("Resources"), 5)
+fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
+if not fonts.m_internal:    # page has no fonts yet
+fonts = mupdf.pdf_new_dict(pdf, 5)
+mupdf.pdf_dict_putl(page.obj(), fonts, PDF_NAME('Resources'), PDF_NAME('Font'))
+# store font in resources and fonts objects will contain named reference to font
+_, xref = JM_INT_ITEM(value, 0)
+if not xref:
+raise RuntimeError( "cannot insert font")
+font_obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+mupdf.pdf_dict_puts(fonts, fontname, font_obj)
+return value
+def _load_annot(self, name, xref):
+page = self._pdf_page()
+if xref == 0:
+annot = JM_get_annot_by_name(page, name)
+else:
+annot = JM_get_annot_by_xref(page, xref)
+if annot.m_internal:
+return Annot(annot)
+def _makePixmap(self, doc, ctm, cs, alpha=0, annots=1, clip=None):
+pix = JM_pixmap_from_page(doc, self.this, ctm, cs, alpha, annots, clip)
+return Pixmap(pix)
+def _other_box(self, boxtype):
+rect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
+page = _as_pdf_page(self.this, required=False)
+if page.m_internal:
+obj = mupdf.pdf_dict_gets( page.obj(), boxtype)
+if mupdf.pdf_is_array(obj):
+rect = mupdf.pdf_to_rect(obj)
+if mupdf.fz_is_infinite_rect( rect):
+return
+return JM_py_from_rect(rect)
+def _pdf_page(self, required=True):
+return _as_pdf_page(self.this, required=required)
+def _reset_annot_refs(self):
+"""Invalidate / delete all annots of this page."""
+self._annot_refs.clear()
+def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None):
+if CA >= 1 and ca >= 1 and blendmode is None:
+return
+tCA = int(round(max(CA , 0) * 100))
+if tCA >= 100:
+tCA = 99
+tca = int(round(max(ca, 0) * 100))
+if tca >= 100:
+tca = 99
+gstate = "fitzca%02i%02i" % (tCA, tca)
+if not gstate:
+return
+page = _as_pdf_page(self.this)
+resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
+if not resources.m_internal:
+resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 2)
+extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
+if not extg.m_internal:
+extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), 2)
+n = mupdf.pdf_dict_len(extg)
+for i in range(n):
+o1 = mupdf.pdf_dict_get_key(extg, i)
+name = mupdf.pdf_to_name(o1)
+if name == gstate:
+return gstate
+opa = mupdf.pdf_new_dict(page.doc(), 3)
+mupdf.pdf_dict_put_real(opa, PDF_NAME('CA'), CA)
+mupdf.pdf_dict_put_real(opa, PDF_NAME('ca'), ca)
+mupdf.pdf_dict_puts(extg, gstate, opa)
+return gstate
+def _set_pagebox(self, boxtype, rect):
+doc = self.parent
+if doc is None:
+raise ValueError("orphaned object: parent is None")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+valid_boxes = ("CropBox", "BleedBox", "TrimBox", "ArtBox")
+if boxtype not in valid_boxes:
+raise ValueError("bad boxtype")
+rect = Rect(rect)
+mb = self.mediabox
+rect = Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
+if not (mb.x0 <= rect.x0 < rect.x1 <= mb.x1 and mb.y0 <= rect.y0 < rect.y1 <= mb.y1):
+raise ValueError(f"{boxtype} not in MediaBox")
+doc.xref_set_key(self.xref, boxtype, f"[{_format_g(tuple(rect))}]")
+def _set_resource_property(self, name, xref):
+page = self._pdf_page()
+JM_set_resource_property(page.obj(), name, xref)
+def _show_pdf_page(self, fz_srcpage, overlay=1, matrix=None, xref=0, oc=0, clip=None, graftmap=None, _imgname=None):
+cropbox = JM_rect_from_py(clip)
+mat = JM_matrix_from_py(matrix)
+rc_xref = xref
+tpage = _as_pdf_page(self.this)
+tpageref = tpage.obj()
+pdfout = tpage.doc()    # target PDF
+ENSURE_OPERATION(pdfout)
+#-------------------------------------------------------------
+# convert the source page to a Form XObject
+#-------------------------------------------------------------
+xobj1 = JM_xobject_from_page(pdfout, fz_srcpage, xref, graftmap.this)
+if not rc_xref:
+rc_xref = mupdf.pdf_to_num(xobj1)
+#-------------------------------------------------------------
+# create referencing XObject (controls display on target page)
+#-------------------------------------------------------------
+# fill reference to xobj1 into the /Resources
+#-------------------------------------------------------------
+subres1 = mupdf.pdf_new_dict(pdfout, 5)
+mupdf.pdf_dict_puts(subres1, "fullpage", xobj1)
+subres = mupdf.pdf_new_dict(pdfout, 5)
+mupdf.pdf_dict_put(subres, PDF_NAME('XObject'), subres1)
+res = mupdf.fz_new_buffer(20)
+mupdf.fz_append_string(res, "/fullpage Do")
+xobj2 = mupdf.pdf_new_xobject(pdfout, cropbox, mat, subres, res)
+if oc > 0:
+JM_add_oc_object(pdfout, mupdf.pdf_resolve_indirect(xobj2), oc)
+#-------------------------------------------------------------
+# update target page with xobj2:
+#-------------------------------------------------------------
+# 1. insert Xobject in Resources
+#-------------------------------------------------------------
+resources = mupdf.pdf_dict_get_inheritable(tpageref, PDF_NAME('Resources'))
+if not resources.m_internal:
+resources = mupdf.pdf_dict_put_dict(tpageref,PDF_NAME('Resources'), 5)
+subres = mupdf.pdf_dict_get(resources, PDF_NAME('XObject'))
+if not subres.m_internal:
+subres = mupdf.pdf_dict_put_dict(resources, PDF_NAME('XObject'), 5)
+mupdf.pdf_dict_puts(subres, _imgname, xobj2)
+#-------------------------------------------------------------
+# 2. make and insert new Contents object
+#-------------------------------------------------------------
+nres = mupdf.fz_new_buffer(50) # buffer for Do-command
+mupdf.fz_append_string(nres, " q /")   # Do-command
+mupdf.fz_append_string(nres, _imgname)
+mupdf.fz_append_string(nres, " Do Q ")
+JM_insert_contents(pdfout, tpageref, nres, overlay)
+return rc_xref
+def add_caret_annot(self, point: point_like) -> Annot:
+"""Add a 'Caret' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_caret_annot(point)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot = Annot( annot)
+annot_postprocess(self, annot)
+assert hasattr( annot, 'parent')
+return annot
+def add_circle_annot(self, rect: rect_like) -> Annot:
+"""Add a 'Circle' (ellipse, oval) annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_CIRCLE)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_file_annot(
+self,
+point: point_like,
+buffer_: ByteString,
+filename: str,
+ufilename: OptStr =None,
+desc: OptStr =None,
+icon: OptStr =None
+) -> Annot:
+"""Add a 'FileAttachment' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_file_annot(point,
+buffer_,
+filename,
+ufilename=ufilename,
+desc=desc,
+icon=icon,
+)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_freetext_annot(
+self,
+rect: rect_like,
+text: str,
+*,
+fontsize: float =11,
+fontname: OptStr =None,
+text_color: OptSeq =None,
+fill_color: OptSeq =None,
+border_color: OptSeq =None,
+border_width: float =0,
+dashes: OptSeq =None,
+callout: OptSeq =None,
+line_end: int=mupdf.PDF_ANNOT_LE_OPEN_ARROW,
+opacity: float =1,
+align: int =0,
+rotate: int =0,
+richtext=False,
+style=None,
+) -> Annot:
+"""Add a 'FreeText' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_freetext_annot(
+rect,
+text,
+fontsize=fontsize,
+fontname=fontname,
+text_color=text_color,
+fill_color=fill_color,
+border_color=border_color,
+border_width=border_width,
+dashes=dashes,
+callout=callout,
+line_end=line_end,
+opacity=opacity,
+align=align,
+rotate=rotate,
+richtext=richtext,
+style=style,
+)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_highlight_annot(self, quads=None, start=None,
+stop=None, clip=None) -> Annot:
+"""Add a 'Highlight' annotation."""
+if quads is None:
+q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
+else:
+q = CheckMarkerArg(quads)
+ret = self._add_text_marker(q, mupdf.PDF_ANNOT_HIGHLIGHT)
+return ret
+def add_ink_annot(self, handwriting: list) -> Annot:
+"""Add a 'Ink' ('handwriting') annotation.
+The argument must be a list of lists of point_likes.
+"""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_ink_annot(handwriting)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_line_annot(self, p1: point_like, p2: point_like) -> Annot:
+"""Add a 'Line' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_line_annot(p1, p2)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_polygon_annot(self, points: list) -> Annot:
+"""Add a 'Polygon' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLYGON)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_polyline_annot(self, points: list) -> Annot:
+"""Add a 'PolyLine' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_multiline(points, mupdf.PDF_ANNOT_POLY_LINE)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_rect_annot(self, rect: rect_like) -> Annot:
+"""Add a 'Square' (rectangle) annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_square_or_circle(rect, mupdf.PDF_ANNOT_SQUARE)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_redact_annot(
+self,
+quad,
+text: OptStr =None,
+fontname: OptStr =None,
+fontsize: float =11,
+align: int =0,
+fill: OptSeq =None,
+text_color: OptSeq =None,
+cross_out: bool =True,
+) -> Annot:
+"""Add a 'Redact' annotation."""
+da_str = None
+if text and not set(string.whitespace).issuperset(text):
+CheckColor(fill)
+CheckColor(text_color)
+if not fontname:
+fontname = "Helv"
+if not fontsize:
+fontsize = 11
+if not text_color:
+text_color = (0, 0, 0)
+if hasattr(text_color, "__float__"):
+text_color = (text_color, text_color, text_color)
+if len(text_color) > 3:
+text_color = text_color[:3]
+fmt = "{:g} {:g} {:g} rg /{f:s} {s:g} Tf"
+da_str = fmt.format(*text_color, f=fontname, s=fontsize)
+if fill is None:
+fill = (1, 1, 1)
+if fill:
+if hasattr(fill, "__float__"):
+fill = (fill, fill, fill)
+if len(fill) > 3:
+fill = fill[:3]
+else:
+text = None
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_redact_annot(quad, text=text, da_str=da_str,
+align=align, fill=fill)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+#-------------------------------------------------------------
+# change appearance to show a crossed-out rectangle
+#-------------------------------------------------------------
+if cross_out:
+ap_tab = annot._getAP().splitlines()[:-1]  # get the 4 commands only
+_, LL, LR, UR, UL = ap_tab
+ap_tab.append(LR)
+ap_tab.append(LL)
+ap_tab.append(UR)
+ap_tab.append(LL)
+ap_tab.append(UL)
+ap_tab.append(b"S")
+ap = b"\n".join(ap_tab)
+annot._setAP(ap, 0)
+return annot
+def add_squiggly_annot(
+self,
+quads=None,
+start=None,
+stop=None,
+clip=None,
+) -> Annot:
+"""Add a 'Squiggly' annotation."""
+if quads is None:
+q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
+else:
+q = CheckMarkerArg(quads)
+return self._add_text_marker(q, mupdf.PDF_ANNOT_SQUIGGLY)
+def add_stamp_annot(self, rect: rect_like, stamp=0) -> Annot:
+"""Add a ('rubber') 'Stamp' annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_stamp_annot(rect, stamp)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_strikeout_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
+"""Add a 'StrikeOut' annotation."""
+if quads is None:
+q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
+else:
+q = CheckMarkerArg(quads)
+return self._add_text_marker(q, mupdf.PDF_ANNOT_STRIKE_OUT)
+def add_text_annot(self, point: point_like, text: str, icon: str ="Note") -> Annot:
+"""Add a 'Text' (sticky note) annotation."""
+old_rotation = annot_preprocess(self)
+try:
+annot = self._add_text_annot(point, text, icon=icon)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+annot_postprocess(self, annot)
+return annot
+def add_underline_annot(self, quads=None, start=None, stop=None, clip=None) -> Annot:
+"""Add a 'Underline' annotation."""
+if quads is None:
+q = get_highlight_selection(self, start=start, stop=stop, clip=clip)
+else:
+q = CheckMarkerArg(quads)
+return self._add_text_marker(q, mupdf.PDF_ANNOT_UNDERLINE)
+def add_widget(self, widget: Widget) -> Annot:
+"""Add a 'Widget' (form field)."""
+CheckParent(self)
+doc = self.parent
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+widget._validate()
+annot = self._addWidget(widget.field_type, widget.field_name)
+if not annot:
+return None
+annot.thisown = True
+annot.parent = weakref.proxy(self) # owning page object
+self._annot_refs[id(annot)] = annot
+widget.parent = annot.parent
+widget._annot = annot
+widget.update()
+return annot
+def annot_names(self):
+'''
+page get list of annot names
+'''
+"""List of names of annotations, fields and links."""
+CheckParent(self)
+page = self._pdf_page(required=False)
+if not page.m_internal:
+return []
+return JM_get_annot_id_list(page)
+def annot_xrefs(self):
+'''
+List of xref numbers of annotations, fields and links.
+'''
+return JM_get_annot_xref_list2(self)
+def annots(self, types=None):
+""" Generator over the annotations of a page.
+Args:
+types: (list) annotation types to subselect from. If none,
+all annotations are returned. E.g. types=[PDF_ANNOT_LINE]
+will only yield line annotations.
+"""
+skip_types = (mupdf.PDF_ANNOT_LINK, mupdf.PDF_ANNOT_POPUP, mupdf.PDF_ANNOT_WIDGET)
+if not hasattr(types, "__getitem__"):
+annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] not in skip_types]
+else:
+annot_xrefs = [a[0] for a in self.annot_xrefs() if a[1] in types and a[1] not in skip_types]
+for xref in annot_xrefs:
+annot = self.load_annot(xref)
+annot._yielded=True
+yield annot
+def recolor(self, components=1):
+"""Convert colorspaces of objects on the page.
+Valid values are 1, 3 and 4.
+"""
+if components not in (1, 3, 4):
+raise ValueError("components must be one of 1, 3, 4")
+pdfdoc = _as_pdf_document(self.parent)
+ropt = mupdf.pdf_recolor_options()
+ropt.num_comp = components
+ropts = mupdf.PdfRecolorOptions(ropt)
+mupdf.pdf_recolor_page(pdfdoc, self.number, ropts)
+def clip_to_rect(self, rect):
+"""Clip away page content outside the rectangle."""
+clip = Rect(rect)
+if clip.is_infinite or (clip & self.rect).is_empty:
+raise ValueError("rect must not be infinite or empty")
+clip *= self.transformation_matrix
+pdfpage = _as_pdf_page(self)
+pclip = JM_rect_from_py(clip)
+mupdf.pdf_clip_page(pdfpage, pclip)
+@property
+def artbox(self):
+"""The ArtBox"""
+rect = self._other_box("ArtBox")
+if rect is None:
+return self.cropbox
+mb = self.mediabox
+return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
+@property
+def bleedbox(self):
+"""The BleedBox"""
+rect = self._other_box("BleedBox")
+if rect is None:
+return self.cropbox
+mb = self.mediabox
+return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
+def bound(self):
+"""Get page rectangle."""
+CheckParent(self)
+page = _as_fz_page(self.this)
+val = mupdf.fz_bound_page(page)
+val = Rect(val)
+if val.is_infinite and self.parent.is_pdf:
+cb = self.cropbox
+w, h = cb.width, cb.height
+if self.rotation not in (0, 180):
+w, h = h, w
+val = Rect(0, 0, w, h)
+msg = TOOLS.mupdf_warnings(reset=False).splitlines()[-1]
+message(msg)
+return val
+def clean_contents(self, sanitize=1):
+if not sanitize and not self.is_wrapped:
+self.wrap_contents()
+page = _as_pdf_page( self.this, required=False)
+if not page.m_internal:
+return
+filter_ = _make_PdfFilterOptions(recurse=1, sanitize=sanitize)
+mupdf.pdf_filter_page_contents( page.doc(), page, filter_)
+@property
+def cropbox(self):
+"""The CropBox."""
+CheckParent(self)
+page = self._pdf_page(required=False)
+if not page.m_internal:
+val = mupdf.fz_bound_page(self.this)
+else:
+val = JM_cropbox(page.obj())
+val = Rect(val)
+return val
+@property
+def cropbox_position(self):
+return self.cropbox.tl
+def delete_annot(self, annot):
+"""Delete annot and return next one."""
+CheckParent(self)
+CheckParent(annot)
+page = self._pdf_page()
+while 1:
+# first loop through all /IRT annots and remove them
+irt_annot = JM_find_annot_irt(annot.this)
+if not irt_annot:    # no more there
+break
+mupdf.pdf_delete_annot(page, irt_annot.this)
+nextannot = mupdf.pdf_next_annot(annot.this)   # store next
+mupdf.pdf_delete_annot(page, annot.this)
+val = Annot(nextannot)
+if val:
+val.thisown = True
+val.parent = weakref.proxy(self) # owning page object
+val.parent._annot_refs[id(val)] = val
+annot._erase()
+return val
+def delete_link(self, linkdict):
+"""Delete a Link."""
+CheckParent(self)
+if not isinstance( linkdict, dict):
+return  # have no dictionary
+def finished():
+if linkdict["xref"] == 0: return
+try:
+linkid = linkdict["id"]
+linkobj = self._annot_refs[linkid]
+linkobj._erase()
+except Exception:
+# Don't print this exception, to match classic. Issue #2841.
+if g_exceptions_verbose > 1:    exception_info()
+pass
+page = _as_pdf_page(self.this, required=False)
+if not page.m_internal:
+return finished()   # have no PDF
+xref = linkdict[dictkey_xref]
+if xref < 1:
+return finished()   # invalid xref
+annots = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
+if not annots.m_internal:
+return finished()   # have no annotations
+len_ = mupdf.pdf_array_len( annots)
+if len_ == 0:
+return finished()
+oxref = 0
+for i in range( len_):
+oxref = mupdf.pdf_to_num( mupdf.pdf_array_get( annots, i))
+if xref == oxref:
+break   # found xref in annotations
+if xref != oxref:
+return finished()   # xref not in annotations
+mupdf.pdf_array_delete( annots, i) # delete entry in annotations
+mupdf.pdf_delete_object( page.doc(), xref) # delete link object
+mupdf.pdf_dict_put( page.obj(), PDF_NAME('Annots'), annots)
+JM_refresh_links( page)
+return finished()
+@property
+def derotation_matrix(self) -> Matrix:
+"""Reflects page de-rotation."""
+if g_use_extra:
+return Matrix(extra.Page_derotate_matrix( self.this))
+pdfpage = self._pdf_page(required=False)
+if not pdfpage.m_internal:
+return Matrix(mupdf.FzRect(mupdf.FzRect.UNIT))
+return Matrix(JM_derotate_page_matrix(pdfpage))
+def extend_textpage(self, tpage, flags=0, matrix=None):
+page = self.this
+tp = tpage.this
+assert isinstance( tp, mupdf.FzStextPage)
+options = mupdf.FzStextOptions()
+options.flags = flags
+ctm = JM_matrix_from_py(matrix)
+dev = mupdf.FzDevice(tp, options)
+mupdf.fz_run_page( page, dev, ctm, mupdf.FzCookie())
+mupdf.fz_close_device( dev)
+@property
+def first_annot(self):
+"""First annotation."""
+CheckParent(self)
+page = self._pdf_page(required=False)
+if not page.m_internal:
+return
+annot = mupdf.pdf_first_annot(page)
+if not annot.m_internal:
+return
+val = Annot(annot)
+val.thisown = True
+val.parent = weakref.proxy(self) # owning page object
+self._annot_refs[id(val)] = val
+return val
+@property
+def first_link(self):
+'''
+First link on page
+'''
+return self.load_links()
+@property
+def first_widget(self):
+"""First widget/field."""
+CheckParent(self)
+annot = 0
+page = self._pdf_page(required=False)
+if not page.m_internal:
+return
+annot = mupdf.pdf_first_widget(page)
+if not annot.m_internal:
+return
+val = Annot(annot)
+val.thisown = True
+val.parent = weakref.proxy(self) # owning page object
+self._annot_refs[id(val)] = val
+widget = Widget()
+TOOLS._fill_widget(val, widget)
+val = widget
+return val
+def get_bboxlog(self, layers=None):
+CheckParent(self)
+old_rotation = self.rotation
+if old_rotation != 0:
+self.set_rotation(0)
+page = self.this
+rc = []
+inc_layers = True if layers else False
+dev = JM_new_bbox_device( rc, inc_layers)
+mupdf.fz_run_page( page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+mupdf.fz_close_device( dev)
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+return rc
+def get_cdrawings(self, extended=None, callback=None, method=None):
+"""Extract vector graphics ("line art") from the page."""
+CheckParent(self)
+old_rotation = self.rotation
+if old_rotation != 0:
+self.set_rotation(0)
+page = self.this
+if isinstance(page, mupdf.PdfPage):
+# Downcast pdf_page to fz_page.
+page = mupdf.FzPage(page)
+assert isinstance(page, mupdf.FzPage), f'{self.this=}'
+clips = True if extended else False
+prect = mupdf.fz_bound_page(page)
+if g_use_extra:
+rc = extra.get_cdrawings(page, extended, callback, method)
+else:
+rc = list()
+if callable(callback) or method is not None:
+dev = JM_new_lineart_device_Device(callback, clips, method)
+else:
+dev = JM_new_lineart_device_Device(rc, clips, method)
+dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
+mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+if callable(callback) or method is not None:
+return
+return rc
+def get_contents(self):
+"""Get xrefs of /Contents objects."""
+CheckParent(self)
+ret = []
+page = _as_pdf_page(self.this)
+obj = page.obj()
+contents = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Contents)
+if mupdf.pdf_is_array(contents):
+n = mupdf.pdf_array_len(contents)
+for i in range(n):
+icont = mupdf.pdf_array_get(contents, i)
+xref = mupdf.pdf_to_num(icont)
+ret.append(xref)
+elif contents.m_internal:
+xref = mupdf.pdf_to_num(contents)
+ret.append( xref)
+return ret
+def get_displaylist(self, annots=1):
+'''
+Make a DisplayList from the page for Pixmap generation.
+Include (default) or exclude annotations.
+'''
+CheckParent(self)
+if annots:
+dl = mupdf.fz_new_display_list_from_page(self.this)
+else:
+dl = mupdf.fz_new_display_list_from_page_contents(self.this)
+return DisplayList(dl)
+def get_drawings(self, extended: bool=False) -> list:
+"""Retrieve vector graphics. The extended version includes clips.
+Note:
+For greater comfort, this method converts point-likes, rect-likes, quad-likes
+of the C version to respective Point / Rect / Quad objects.
+It also adds default items that are missing in original path types.
+"""
+allkeys = (
+'closePath',
+'fill',
+'color',
+'width',
+'lineCap',
+'lineJoin',
+'dashes',
+'stroke_opacity',
+'fill_opacity',
+'even_odd',
+)
+val = self.get_cdrawings(extended=extended)
+for i in range(len(val)):
+npath = val[i]
+if not npath["type"].startswith("clip"):
+npath["rect"] = Rect(npath["rect"])
+else:
+npath["scissor"] = Rect(npath["scissor"])
+if npath["type"]!="group":
+items = npath["items"]
+newitems = []
+for item in items:
+cmd = item[0]
+rest = item[1:]
+if  cmd == "re":
+item = ("re", Rect(rest[0]).normalize(), rest[1])
+elif cmd == "qu":
+item = ("qu", Quad(rest[0]))
+else:
+item = tuple([cmd] + [Point(i) for i in rest])
+newitems.append(item)
+npath["items"] = newitems
+if npath['type'] in ('f', 's'):
+for k in allkeys:
+npath[k] = npath.get(k)
+val[i] = npath
+return val
+class Drawpath(object):
+"""Reflects a path dictionary from get_cdrawings()."""
+def __init__(self, **args):
+self.__dict__.update(args)
+class Drawpathlist(object):
+"""List of Path objects representing get_cdrawings() output."""
+def __getitem__(self, item):
+return self.paths.__getitem__(item)
+def __init__(self):
+self.paths = []
+self.path_count = 0
+self.group_count = 0
+self.clip_count = 0
+self.fill_count = 0
+self.stroke_count = 0
+self.fillstroke_count = 0
+def __len__(self):
+return self.paths.__len__()
+def append(self, path):
+self.paths.append(path)
+self.path_count += 1
+if path.type == "clip":
+self.clip_count += 1
+elif path.type == "group":
+self.group_count += 1
+elif path.type == "f":
+self.fill_count += 1
+elif path.type == "s":
+self.stroke_count += 1
+elif path.type == "fs":
+self.fillstroke_count += 1
+def clip_parents(self, i):
+"""Return list of parent clip paths.
+Args:
+i: (int) return parents of this path.
+Returns:
+List of the clip parents."""
+if i >= self.path_count:
+raise IndexError("bad path index")
+while i < 0:
+i += self.path_count
+lvl = self.paths[i].level
+clips = list(  # clip paths before identified one
+reversed(
+[
+p
+for p in self.paths[:i]
+if p.type == "clip" and p.level < lvl
+]
+)
+)
+if clips == []:  # none found: empty list
+return []
+nclips = [clips[0]]  # init return list
+for p in clips[1:]:
+if p.level >= nclips[-1].level:
+continue  # only accept smaller clip levels
+nclips.append(p)
+return nclips
+def group_parents(self, i):
+"""Return list of parent group paths.
+Args:
+i: (int) return parents of this path.
+Returns:
+List of the group parents."""
+if i >= self.path_count:
+raise IndexError("bad path index")
+while i < 0:
+i += self.path_count
+lvl = self.paths[i].level
+groups = list(  # group paths before identified one
+reversed(
+[
+p
+for p in self.paths[:i]
+if p.type == "group" and p.level < lvl
+]
+)
+)
+if groups == []:  # none found: empty list
+return []
+ngroups = [groups[0]]  # init return list
+for p in groups[1:]:
+if p.level >= ngroups[-1].level:
+continue  # only accept smaller group levels
+ngroups.append(p)
+return ngroups
+def get_lineart(self) -> object:
+"""Get page drawings paths.
+Note:
+For greater comfort, this method converts point-like, rect-like, quad-like
+tuples of the C version to respective Point / Rect / Quad objects.
+Also adds default items that are missing in original path types.
+In contrast to get_drawings(), this output is an object.
+"""
+val = self.get_cdrawings(extended=True)
+paths = self.Drawpathlist()
+for path in val:
+npath = self.Drawpath(**path)
+if npath.type != "clip":
+npath.rect = Rect(path["rect"])
+else:
+npath.scissor = Rect(path["scissor"])
+if npath.type != "group":
+items = path["items"]
+newitems = []
+for item in items:
+cmd = item[0]
+rest = item[1:]
+if  cmd == "re":
+item = ("re", Rect(rest[0]).normalize(), rest[1])
+elif cmd == "qu":
+item = ("qu", Quad(rest[0]))
+else:
+item = tuple([cmd] + [Point(i) for i in rest])
+newitems.append(item)
+npath.items = newitems
+if npath.type == "f":
+npath.stroke_opacity = None
+npath.dashes = None
+npath.line_join = None
+npath.line_cap = None
+npath.color = None
+npath.width = None
+paths.append(npath)
+val = None
+return paths
+def remove_rotation(self):
+"""Set page rotation to 0 while maintaining visual appearance."""
+rot = self.rotation  # normalized rotation value
+if rot == 0:
+return  Identity # nothing to do
+# need to derotate the page's content
+mb = self.mediabox  # current mediabox
+if rot == 90:
+# before derotation, shift content horizontally
+mat0 = Matrix(1, 0, 0, 1, mb.y1 - mb.x1 - mb.x0 - mb.y0, 0)
+elif rot == 270:
+# before derotation, shift content vertically
+mat0 = Matrix(1, 0, 0, 1, 0, mb.x1 - mb.y1 - mb.y0 - mb.x0)
+else:  # rot = 180
+mat0 = Matrix(1, 0, 0, 1, -2 * mb.x0, -2 * mb.y0)
+# prefix with derotation matrix
+mat = mat0 * self.derotation_matrix
+cmd = _format_g(tuple(mat)) + ' cm '
+cmd = cmd.encode('utf8')
+_ = TOOLS._insert_contents(self, cmd, False)  # prepend to page contents
+# swap x- and y-coordinates
+if rot in (90, 270):
+x0, y0, x1, y1 = mb
+mb.x0 = y0
+mb.y0 = x0
+mb.x1 = y1
+mb.y1 = x1
+self.set_mediabox(mb)
+self.set_rotation(0)
+rot = ~mat  # inverse of the derotation matrix
+for annot in self.annots():  # modify rectangles of annotations
+r = annot.rect * rot
+# TODO: only try to set rectangle for applicable annot types
+annot.set_rect(r)
+for link in self.get_links():  # modify 'from' rectangles of links
+r = link["from"] * rot
+self.delete_link(link)
+link["from"] = r
+try:  # invalid links remain deleted
+self.insert_link(link)
+except Exception:
+pass
+for widget in self.widgets():  # modify field rectangles
+r = widget.rect * rot
+widget.rect = r
+widget.update()
+return rot  # the inverse of the generated derotation matrix
+def cluster_drawings(
+self, clip=None, drawings=None, x_tolerance: float = 3, y_tolerance: float = 3,
+final_filter: bool = True,
+) -> list:
+"""Join rectangles of neighboring vector graphic items.
+Args:
+clip: optional rect-like to restrict the page area to consider.
+drawings: (optional) output of a previous "get_drawings()".
+x_tolerance: horizontal neighborhood threshold.
+y_tolerance: vertical neighborhood threshold.
+Notes:
+Vector graphics (also called line-art or drawings) usually consist
+of independent items like rectangles, lines or curves to jointly
+form table grid lines or bar, line, pie charts and similar.
+This method identifies rectangles wrapping these disparate items.
+Returns:
+A list of Rect items, each wrapping line-art items that are close
+enough to be considered forming a common vector graphic.
+Only "significant" rectangles will be returned, i.e. having both,
+width and height larger than the tolerance values.
+"""
+CheckParent(self)
+parea = self.rect  # the default clipping area
+if clip is not None:
+parea = Rect(clip)
+delta_x = x_tolerance  # shorter local name
+delta_y = y_tolerance  # shorter local name
+if drawings is None:  # if we cannot re-use a previous output
+drawings = self.get_drawings()
+def are_neighbors(r1, r2):
+"""Detect whether r1, r2 are "neighbors".
+Items r1, r2 are called neighbors if the minimum distance between
+their points is less-equal delta.
+Both parameters must be (potentially invalid) rectangles.
+"""
+# normalize rectangles as needed
+rr1_x0, rr1_x1 = (r1.x0, r1.x1) if r1.x1 > r1.x0 else (r1.x1, r1.x0)
+rr1_y0, rr1_y1 = (r1.y0, r1.y1) if r1.y1 > r1.y0 else (r1.y1, r1.y0)
+rr2_x0, rr2_x1 = (r2.x0, r2.x1) if r2.x1 > r2.x0 else (r2.x1, r2.x0)
+rr2_y0, rr2_y1 = (r2.y0, r2.y1) if r2.y1 > r2.y0 else (r2.y1, r2.y0)
+if (
+0
+or rr1_x1 < rr2_x0 - delta_x
+or rr1_x0 > rr2_x1 + delta_x
+or rr1_y1 < rr2_y0 - delta_y
+or rr1_y0 > rr2_y1 + delta_y
+):
+# Rects do not overlap.
+return False
+else:
+# Rects overlap.
+return True
+# exclude graphics not contained in the clip
+paths = [
+p
+for p in drawings
+if 1
+and p["rect"].x0 >= parea.x0
+and p["rect"].x1 <= parea.x1
+and p["rect"].y0 >= parea.y0
+and p["rect"].y1 <= parea.y1
+]
+# list of all vector graphic rectangles
+prects = sorted([p["rect"] for p in paths], key=lambda r: (r.y1, r.x0))
+new_rects = []  # the final list of the joined rectangles
+# -------------------------------------------------------------------------
+# The strategy is to identify and join all rects that are neighbors
+# -------------------------------------------------------------------------
+while prects:  # the algorithm will empty this list
+r = +prects[0]  # copy of first rectangle
+repeat = True
+while repeat:
+repeat = False
+for i in range(len(prects) - 1, 0, -1):  # from back to front
+if are_neighbors(prects[i], r):
+r |= prects[i].tl  # include in first rect
+r |= prects[i].br  # include in first rect
+del prects[i]  # delete this rect
+repeat = True
+new_rects.append(r)
+del prects[0]
+prects = sorted(set(prects), key=lambda r: (r.y1, r.x0))
+new_rects = sorted(set(new_rects), key=lambda r: (r.y1, r.x0))
+if not final_filter:
+return new_rects
+return [r for r in new_rects if r.width > delta_x and r.height > delta_y]
+def get_fonts(self, full=False):
+"""List of fonts defined in the page object."""
+CheckParent(self)
+return self.parent.get_page_fonts(self.number, full=full)
+def get_image_bbox(self, name, transform=0):
+"""Get rectangle occupied by image 'name'.
+'name' is either an item of the image list, or the referencing
+name string - elem[7] of the resp. item.
+Option 'transform' also returns the image transformation matrix.
+"""
+CheckParent(self)
+doc = self.parent
+if doc.is_closed or doc.is_encrypted:
+raise ValueError('document closed or encrypted')
+inf_rect = Rect(1, 1, -1, -1)
+null_mat = Matrix()
+if transform:
+rc = (inf_rect, null_mat)
+else:
+rc = inf_rect
+if type(name) in (list, tuple):
+if not type(name[-1]) is int:
+raise ValueError('need item of full page image list')
+item = name
+else:
+imglist = [i for i in doc.get_page_images(self.number, True) if name == i[7]]
+if len(imglist) == 1:
+item = imglist[0]
+elif imglist == []:
+raise ValueError('bad image name')
+else:
+raise ValueError("found multiple images named '%s'." % name)
+xref = item[-1]
+if xref != 0 or transform:
+try:
+return self.get_image_rects(item, transform=transform)[0]
+except Exception:
+exception_info()
+return inf_rect
+pdf_page = self._pdf_page()
+val = JM_image_reporter(pdf_page)
+if not bool(val):
+return rc
+for v in val:
+if v[0] != item[-3]:
+continue
+q = Quad(v[1])
+bbox = q.rect
+if transform == 0:
+rc = bbox
+break
+hm = Matrix(util_hor_matrix(q.ll, q.lr))
+h = abs(q.ll - q.ul)
+w = abs(q.ur - q.ul)
+m0 = Matrix(1 / w, 0, 0, 1 / h, 0, 0)
+m = ~(hm * m0)
+rc = (bbox, m)
+break
+val = rc
+return val
+def get_images(self, full=False):
+"""List of images defined in the page object."""
+CheckParent(self)
+return self.parent.get_page_images(self.number, full=full)
+def get_oc_items(self) -> list:
+"""Get OCGs and OCMDs used in the page's contents.
+Returns:
+List of items (name, xref, type), where type is one of "ocg" / "ocmd",
+and name is the property name.
+"""
+rc = []
+for pname, xref in self._get_resource_properties():
+text = self.parent.xref_object(xref, compressed=True)
+if "/Type/OCG" in text:
+octype = "ocg"
+elif "/Type/OCMD" in text:
+octype = "ocmd"
+else:
+continue
+rc.append((pname, xref, octype))
+return rc
+def get_svg_image(self, matrix=None, text_as_path=1):
+"""Make SVG image from page."""
+CheckParent(self)
+mediabox = mupdf.fz_bound_page(self.this)
+ctm = JM_matrix_from_py(matrix)
+tbounds = mediabox
+text_option = mupdf.FZ_SVG_TEXT_AS_PATH if text_as_path == 1 else mupdf.FZ_SVG_TEXT_AS_TEXT
+tbounds = mupdf.fz_transform_rect(tbounds, ctm)
+res = mupdf.fz_new_buffer(1024)
+out = mupdf.FzOutput(res)
+dev = mupdf.fz_new_svg_device(
+out,
+tbounds.x1-tbounds.x0,  # width
+tbounds.y1-tbounds.y0,  # height
+text_option,
+1,
+)
+mupdf.fz_run_page(self.this, dev, ctm, mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+out.fz_close_output()
+text = JM_EscapeStrFromBuffer(res)
+return text
+def get_textbox(
+page: Page,
+rect: rect_like,
+textpage=None,  #: TextPage = None,
+) -> str:
+tp = textpage
+if tp is None:
+tp = page.get_textpage()
+elif getattr(tp, "parent") != page:
+raise ValueError("not a textpage of this page")
+rc = tp.extractTextbox(rect)
+if textpage is None:
+del tp
+return rc
+def get_textpage(self, clip: rect_like = None, flags: int = 0, matrix=None) -> "TextPage":
+CheckParent(self)
+if matrix is None:
+matrix = Matrix(1, 1)
+old_rotation = self.rotation
+if old_rotation != 0:
+self.set_rotation(0)
+try:
+textpage = self._get_textpage(clip, flags=flags, matrix=matrix)
+finally:
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+textpage = TextPage(textpage)
+textpage.parent = weakref.proxy(self)
+return textpage
+def get_texttrace(self):
+CheckParent(self)
+old_rotation = self.rotation
+if old_rotation != 0:
+self.set_rotation(0)
+page = self.this
+rc = []
+if g_use_extra:
+dev = extra.JM_new_texttrace_device(rc)
+else:
+dev = JM_new_texttrace_device(rc)
+prect = mupdf.fz_bound_page(page)
+dev.ptm = mupdf.FzMatrix(1, 0, 0, -1, 0, prect.y1)
+mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+if old_rotation != 0:
+self.set_rotation(old_rotation)
+return rc
+def get_xobjects(self):
+"""List of xobjects defined in the page object."""
+CheckParent(self)
+return self.parent.get_page_xobjects(self.number)
+def insert_font(self, fontname="helv", fontfile=None, fontbuffer=None,
+set_simple=False, wmode=0, encoding=0):
+doc = self.parent
+if doc is None:
+raise ValueError("orphaned object: parent is None")
+idx = 0
+if fontname.startswith("/"):
+fontname = fontname[1:]
+inv_chars = INVALID_NAME_CHARS.intersection(fontname)
+if inv_chars != set():
+raise ValueError(f"bad fontname chars {inv_chars}")
+font = CheckFont(self, fontname)
+if font is not None:                    # font already in font list of page
+xref = font[0]                      # this is the xref
+if CheckFontInfo(doc, xref):        # also in our document font list?
+return xref                     # yes: we are done
+# need to build the doc FontInfo entry - done via get_char_widths
+doc.get_char_widths(xref)
+return xref
+#--------------------------------------------------------------------------
+# the font is not present for this page
+#--------------------------------------------------------------------------
+bfname = Base14_fontdict.get(fontname.lower(), None) # BaseFont if Base-14 font
+serif = 0
+CJK_number = -1
+CJK_list_n = ["china-t", "china-s", "japan", "korea"]
+CJK_list_s = ["china-ts", "china-ss", "japan-s", "korea-s"]
+try:
+CJK_number = CJK_list_n.index(fontname)
+serif = 0
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose > 1:    exception_info()
+pass
+if CJK_number < 0:
+try:
+CJK_number = CJK_list_s.index(fontname)
+serif = 1
+except Exception:
+# Verbose in PyMuPDF/tests.
+if g_exceptions_verbose > 1:    exception_info()
+pass
+if fontname.lower() in fitz_fontdescriptors.keys():
+import pymupdf_fonts
+fontbuffer = pymupdf_fonts.myfont(fontname)  # make a copy
+del pymupdf_fonts
+# install the font for the page
+if fontfile is not None:
+if type(fontfile) is str:
+fontfile_str = fontfile
+elif hasattr(fontfile, "absolute"):
+fontfile_str = str(fontfile)
+elif hasattr(fontfile, "name"):
+fontfile_str = fontfile.name
+else:
+raise ValueError("bad fontfile")
+else:
+fontfile_str = None
+val = self._insertFont(fontname, bfname, fontfile_str, fontbuffer, set_simple, idx,
+wmode, serif, encoding, CJK_number)
+if not val:                   # did not work, error return
+return val
+xref = val[0]                 # xref of installed font
+fontdict = val[1]
+if CheckFontInfo(doc, xref):  # check again: document already has this font
+return xref               # we are done
+# need to create document font info
+doc.get_char_widths(xref, fontdict=fontdict)
+return xref
+@property
+def is_wrapped(self):
+"""Check if /Contents is in a balanced graphics state."""
+return self._count_q_balance() == (0, 0)
+@property
+def language(self):
+"""Page language."""
+pdfpage = _as_pdf_page(self.this, required=False)
+if not pdfpage.m_internal:
+return
+lang = mupdf.pdf_dict_get_inheritable(pdfpage.obj(), PDF_NAME('Lang'))
+if not lang.m_internal:
+return
+return mupdf.pdf_to_str_buf(lang)
+def links(self, kinds=None):
+""" Generator over the links of a page.
+Args:
+kinds: (list) link kinds to subselect from. If none,
+all links are returned. E.g. kinds=[LINK_URI]
+will only yield URI links.
+"""
+all_links = self.get_links()
+for link in all_links:
+if kinds is None or link["kind"] in kinds:
+yield (link)
+def load_annot(self, ident: typing.Union[str, int]) -> Annot:
+"""Load an annot by name (/NM key) or xref.
+Args:
+ident: identifier, either name (str) or xref (int).
+"""
+CheckParent(self)
+if type(ident) is str:
+xref = 0
+name = ident
+elif type(ident) is int:
+xref = ident
+name = None
+else:
+raise ValueError("identifier must be a string or integer")
+val = self._load_annot(name, xref)
+if not val:
+return val
+val.thisown = True
+val.parent = weakref.proxy(self)
+self._annot_refs[id(val)] = val
+return val
+def load_links(self):
+"""Get first Link."""
+CheckParent(self)
+val = mupdf.fz_load_links( self.this)
+if not val.m_internal:
+return
+val = Link( val)
+val.thisown = True
+val.parent = weakref.proxy(self) # owning page object
+self._annot_refs[id(val)] = val
+val.xref = 0
+val.id = ""
+if self.parent.is_pdf:
+xrefs = self.annot_xrefs()
+xrefs = [x for x in xrefs if x[1] == mupdf.PDF_ANNOT_LINK]
+if xrefs:
+link_id = xrefs[0]
+val.xref = link_id[0]
+val.id = link_id[2]
+else:
+val.xref = 0
+val.id = ""
+return val
+#----------------------------------------------------------------
+# page load widget by xref
+#----------------------------------------------------------------
+def load_widget( self, xref):
+"""Load a widget by its xref."""
+CheckParent(self)
+page = _as_pdf_page(self.this)
+annot = JM_get_widget_by_xref( page, xref)
+#log( '{=type(annot)}')
+val = annot
+if not val:
+return val
+val.thisown = True
+val.parent = weakref.proxy(self)
+self._annot_refs[id(val)] = val
+widget = Widget()
+TOOLS._fill_widget(val, widget)
+val = widget
+return val
+@property
+def mediabox(self):
+"""The MediaBox."""
+CheckParent(self)
+page = self._pdf_page(required=False)
+if not page.m_internal:
+rect = mupdf.fz_bound_page( self.this)
+else:
+rect = JM_mediabox( page.obj())
+return Rect(rect)
+@property
+def mediabox_size(self):
+return Point(self.mediabox.x1, self.mediabox.y1)
+#@property
+#def parent( self):
+#    assert self._parent
+#    if self._parent:
+#        return self._parent
+#    return Document( self.this.document())
+def read_contents(self):
+"""All /Contents streams concatenated to one bytes object."""
+return TOOLS._get_all_contents(self)
+def refresh(self):
+"""Refresh page after link/annot/widget updates."""
+CheckParent(self)
+doc = self.parent
+page = doc.reload_page(self)
+# fixme this looks wrong.
+self.this = page
+@property
+def rotation(self):
+"""Page rotation."""
+CheckParent(self)
+page = _as_pdf_page(self.this, required=0)
+if not page.m_internal:
+return 0
+return JM_page_rotation(page)
+@property
+def rotation_matrix(self) -> Matrix:
+"""Reflects page rotation."""
+return Matrix(TOOLS._rotate_matrix(self))
+def run(self, dw, m):
+"""Run page through a device.
+dw: DeviceWrapper
+"""
+CheckParent(self)
+mupdf.fz_run_page(self.this, dw.device, JM_matrix_from_py(m), mupdf.FzCookie())
+def set_artbox(self, rect):
+"""Set the ArtBox."""
+return self._set_pagebox("ArtBox", rect)
+def set_bleedbox(self, rect):
+"""Set the BleedBox."""
+return self._set_pagebox("BleedBox", rect)
+def set_contents(self, xref):
+"""Set object at 'xref' as the page's /Contents."""
+CheckParent(self)
+doc = self.parent
+if doc.is_closed:
+raise ValueError("document closed")
+if not doc.is_pdf:
+raise ValueError("is no PDF")
+if xref not in range(1, doc.xref_length()):
+raise ValueError("bad xref")
+if not doc.xref_is_stream(xref):
+raise ValueError("xref is no stream")
+doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref)
+def set_cropbox(self, rect):
+"""Set the CropBox. Will also change Page.rect."""
+return self._set_pagebox("CropBox", rect)
+def set_language(self, language=None):
+"""Set PDF page default language."""
+CheckParent(self)
+pdfpage = _as_pdf_page(self.this)
+if not language:
+mupdf.pdf_dict_del(pdfpage.obj(), PDF_NAME('Lang'))
+else:
+lang = mupdf.fz_text_language_from_string(language)
+assert hasattr(mupdf, 'fz_string_from_text_language2')
+mupdf.pdf_dict_put_text_string(
+pdfpage.obj,
+PDF_NAME('Lang'),
+mupdf.fz_string_from_text_language2(lang)
+)
+def set_mediabox(self, rect):
+"""Set the MediaBox."""
+CheckParent(self)
+page = self._pdf_page()
+mediabox = JM_rect_from_py(rect)
+if (mupdf.fz_is_empty_rect(mediabox)
+or mupdf.fz_is_infinite_rect(mediabox)
+):
+raise ValueError( MSG_BAD_RECT)
+mupdf.pdf_dict_put_rect( page.obj(), PDF_NAME('MediaBox'), mediabox)
+mupdf.pdf_dict_del( page.obj(), PDF_NAME('CropBox'))
+mupdf.pdf_dict_del( page.obj(), PDF_NAME('ArtBox'))
+mupdf.pdf_dict_del( page.obj(), PDF_NAME('BleedBox'))
+mupdf.pdf_dict_del( page.obj(), PDF_NAME('TrimBox'))
+def set_rotation(self, rotation):
+"""Set page rotation."""
+CheckParent(self)
+page = _as_pdf_page(self.this)
+rot = JM_norm_rotation(rotation)
+mupdf.pdf_dict_put_int( page.obj(), PDF_NAME('Rotate'), rot)
+def set_trimbox(self, rect):
+"""Set the TrimBox."""
+return self._set_pagebox("TrimBox", rect)
+@property
+def transformation_matrix(self):
+"""Page transformation matrix."""
+CheckParent(self)
+ctm = mupdf.FzMatrix()
+page = self._pdf_page(required=False)
+if not page.m_internal:
+return JM_py_from_matrix(ctm)
+mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)    # fixme: original code passed mediabox=NULL.
+mupdf.pdf_page_transform(page, mediabox, ctm)
+val = JM_py_from_matrix(ctm)
+if self.rotation % 360 == 0:
+val = Matrix(val)
+else:
+val = Matrix(1, 0, 0, -1, 0, self.cropbox.height)
+return val
+@property
+def trimbox(self):
+"""The TrimBox"""
+rect = self._other_box("TrimBox")
+if rect is None:
+return self.cropbox
+mb = self.mediabox
+return Rect(rect[0], mb.y1 - rect[3], rect[2], mb.y1 - rect[1])
+def widgets(self, types=None):
+""" Generator over the widgets of a page.
+Args:
+types: (list) field types to subselect from. If none,
+all fields are returned. E.g. types=[PDF_WIDGET_TYPE_TEXT]
+will only yield text fields.
+"""
+#for a in self.annot_xrefs():
+#    log( '{a=}')
+widget_xrefs = [a[0] for a in self.annot_xrefs() if a[1] == mupdf.PDF_ANNOT_WIDGET]
+#log(f'widgets(): {widget_xrefs=}')
+for xref in widget_xrefs:
+widget = self.load_widget(xref)
+if types is None or widget.field_type in types:
+yield (widget)
+def wrap_contents(self):
+"""Ensure page is in a balanced graphics state."""
+push, pop = self._count_q_balance()  # count missing "q"/"Q" commands
+if push > 0:  # prepend required push commands
+prepend = b"q\n" * push
+TOOLS._insert_contents(self, prepend, False)
+if pop > 0:  # append required pop commands
+append = b"\nQ" * pop + b"\n"
+TOOLS._insert_contents(self, append, True)
+@property
+def xref(self):
+"""PDF xref number of page."""
+CheckParent(self)
+return self.parent.page_xref(self.number)
+rect = property(bound, doc="page rectangle")
+class Pixmap:
+def __init__(self, *args):
+"""
+Pixmap(colorspace, irect, alpha) - empty pixmap.
+Pixmap(colorspace, src) - copy changing colorspace.
+Pixmap(src, width, height,[clip]) - scaled copy, float dimensions.
+Pixmap(src, alpha=1) - copy and add or drop alpha channel.
+Pixmap(filename) - from an image in a file.
+Pixmap(image) - from an image in memory (bytes).
+Pixmap(colorspace, width, height, samples, alpha) - from samples data.
+Pixmap(PDFdoc, xref) - from an image at xref in a PDF document.
+"""
+# Cache for property `self.samples_mv`. Set here so __del_() sees it if
+# we raise.
+#
+self._samples_mv = None
+# 2024-01-16: Experimental support for a memory-view of the underlying
+# data.  Doesn't seem to make much difference to Pixmap.set_pixel() so
+# not currently used.
+self._memory_view = None
+if 0:
+pass
+elif args_match(args,
+(Colorspace, mupdf.FzColorspace),
+(mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple)
+):
+# create empty pixmap with colorspace and IRect
+cs, rect = args
+alpha = 0
+pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
+self.this = pm
+elif args_match(args,
+(Colorspace, mupdf.FzColorspace),
+(mupdf.FzRect, mupdf.FzIrect, IRect, Rect, tuple),
+(int, bool)
+):
+# create empty pixmap with colorspace and IRect
+cs, rect, alpha = args
+pm = mupdf.fz_new_pixmap_with_bbox(cs, JM_irect_from_py(rect), mupdf.FzSeparations(0), alpha)
+self.this = pm
+elif args_match(args, (Colorspace, mupdf.FzColorspace, type(None)), (Pixmap, mupdf.FzPixmap)):
+# copy pixmap, converting colorspace
+cs, spix = args
+if isinstance(cs, Colorspace):
+cs = cs.this
+elif cs is None:
+cs = mupdf.FzColorspace(None)
+if isinstance(spix, Pixmap):
+spix = spix.this
+if not mupdf.fz_pixmap_colorspace(spix).m_internal:
+raise ValueError( "source colorspace must not be None")
+if cs.m_internal:
+self.this = mupdf.fz_convert_pixmap(
+spix,
+cs,
+mupdf.FzColorspace(),
+mupdf.FzDefaultColorspaces(None),
+mupdf.FzColorParams(),
+1
+)
+else:
+self.this = mupdf.fz_new_pixmap_from_alpha_channel( spix)
+if not self.this.m_internal:
+raise RuntimeError( MSG_PIX_NOALPHA)
+elif args_match(args, (Pixmap, mupdf.FzPixmap), (Pixmap, mupdf.FzPixmap)):
+# add mask to a pixmap w/o alpha channel
+spix, mpix = args
+if isinstance(spix, Pixmap):
+spix = spix.this
+if isinstance(mpix, Pixmap):
+mpix = mpix.this
+spm = spix
+mpm = mpix
+if not spix.m_internal: # intercept NULL for spix: make alpha only pix
+dst = mupdf.fz_new_pixmap_from_alpha_channel(mpm)
+if not dst.m_internal:
+raise RuntimeError( MSG_PIX_NOALPHA)
+else:
+dst = mupdf.fz_new_pixmap_from_color_and_mask(spm, mpm)
+self.this = dst
+elif (args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int), None) or
+args_match(args, (Pixmap, mupdf.FzPixmap), (float, int), (float, int))):
+# create pixmap as scaled copy of another one
+if len(args) == 3:
+spix, w, h = args
+bbox = mupdf.FzIrect(mupdf.fz_infinite_irect)
+else:
+spix, w, h, clip = args
+bbox = JM_irect_from_py(clip)
+src_pix = spix.this if isinstance(spix, Pixmap) else spix
+if not mupdf.fz_is_infinite_irect(bbox):
+pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, bbox)
+else:
+pm = mupdf.fz_scale_pixmap(src_pix, src_pix.x(), src_pix.y(), w, h, mupdf.FzIrect(mupdf.fz_infinite_irect))
+self.this = pm
+elif args_match(args, str, (Pixmap, mupdf.FzPixmap)) and args[0] == 'raw':
+# Special raw construction where we set .this directly.
+_, pm = args
+if isinstance(pm, Pixmap):
+pm = pm.this
+self.this = pm
+elif args_match(args, (Pixmap, mupdf.FzPixmap), (int, None)):
+# Pixmap(struct Pixmap *spix, int alpha=1)
+# copy pixmap & add / drop the alpha channel
+spix = args[0]
+alpha = args[1] if len(args) == 2 else 1
+src_pix = spix.this if isinstance(spix, Pixmap) else spix
+if not _INRANGE(alpha, 0, 1):
+raise ValueError( "bad alpha value")
+cs = mupdf.fz_pixmap_colorspace(src_pix)
+if not cs.m_internal and not alpha:
+raise ValueError( "cannot drop alpha for 'NULL' colorspace")
+seps = mupdf.FzSeparations()
+n = mupdf.fz_pixmap_colorants(src_pix)
+w = mupdf.fz_pixmap_width(src_pix)
+h = mupdf.fz_pixmap_height(src_pix)
+pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
+pm.m_internal.x = src_pix.m_internal.x
+pm.m_internal.y = src_pix.m_internal.y
+pm.m_internal.xres = src_pix.m_internal.xres
+pm.m_internal.yres = src_pix.m_internal.yres
+# copy samples data ------------------------------------------
+if 1:
+# We use our pixmap_copy() to get best performance.
+# test_pixmap.py:test_setalpha(): 3.9s t=0.0062
+extra.pixmap_copy( pm.m_internal, src_pix.m_internal, n)
+elif 1:
+# Use memoryview.
+# test_pixmap.py:test_setalpha(): 4.6 t=0.51
+src_view = mupdf.fz_pixmap_samples_memoryview( src_pix)
+pm_view = mupdf.fz_pixmap_samples_memoryview( pm)
+if src_pix.alpha() == pm.alpha():   # identical samples
+#memcpy(tptr, sptr, w * h * (n + alpha));
+size = w * h * (n + alpha)
+pm_view[ 0 : size] = src_view[ 0 : size]
+else:
+tptr = 0
+sptr = 0
+# This is a little faster than calling
+# pm.fz_samples_set(), but still quite slow. E.g. reduces
+# test_pixmap.py:test_setalpha() from 6.7s to 4.5s.
+#
+# t=0.53
+pm_stride = pm.stride()
+pm_n = pm.n()
+pm_alpha = pm.alpha()
+src_stride = src_pix.stride()
+src_n = src_pix.n()
+#log( '{=pm_stride pm_n src_stride src_n}')
+for y in range( h):
+for x in range( w):
+pm_i = pm_stride * y + pm_n * x
+src_i = src_stride * y + src_n * x
+pm_view[ pm_i : pm_i + n] = src_view[ src_i : src_i + n]
+if pm_alpha:
+pm_view[ pm_i + n] = 255
+else:
+# Copy individual bytes from Python. Very slow.
+# test_pixmap.py:test_setalpha(): 6.89 t=2.601
+if src_pix.alpha() == pm.alpha():   # identical samples
+#memcpy(tptr, sptr, w * h * (n + alpha));
+for i in range(w * h * (n + alpha)):
+mupdf.fz_samples_set(pm, i, mupdf.fz_samples_get(src_pix, i))
+else:
+# t=2.56
+tptr = 0
+sptr = 0
+src_pix_alpha = src_pix.alpha()
+for i in range(w * h):
+#memcpy(tptr, sptr, n);
+for j in range(n):
+mupdf.fz_samples_set(pm, tptr + j, mupdf.fz_samples_get(src_pix, sptr + j))
+tptr += n
+if pm.alpha():
+mupdf.fz_samples_set(pm, tptr, 255)
+tptr += 1
+sptr += n + src_pix_alpha
+self.this = pm
+elif args_match(args, (mupdf.FzColorspace, Colorspace), int, int, None, (int, bool)):
+# create pixmap from samples data
+cs, w, h, samples, alpha = args
+if isinstance(cs, Colorspace):
+cs = cs.this
+assert isinstance(cs, mupdf.FzColorspace)
+n = mupdf.fz_colorspace_n(cs)
+stride = (n + alpha) * w
+seps = mupdf.FzSeparations()
+pm = mupdf.fz_new_pixmap(cs, w, h, seps, alpha)
+if isinstance( samples, (bytes, bytearray)):
+#log('using mupdf.python_buffer_data()')
+samples2 = mupdf.python_buffer_data(samples)
+size = len(samples)
+else:
+res = JM_BufferFromBytes(samples)
+if not res.m_internal:
+raise ValueError( "bad samples data")
+size, c = mupdf.fz_buffer_storage(res)
+samples2 = mupdf.python_buffer_data(samples) # raw swig proxy for `const unsigned char*`.
+if stride * h != size:
+raise ValueError( f"bad samples length {w=} {h=} {alpha=} {n=} {stride=} {size=}")
+mupdf.ll_fz_pixmap_copy_raw( pm.m_internal, samples2)
+self.this = pm
+elif args_match(args, None):
+# create pixmap from filename, file object, pathlib.Path or memory
+imagedata, = args
+name = 'name'
+if hasattr(imagedata, "resolve"):
+fname = imagedata.__str__()
+if fname:
+img = mupdf.fz_new_image_from_file(fname)
+elif hasattr(imagedata, name):
+fname = imagedata.name
+if fname:
+img = mupdf.fz_new_image_from_file(fname)
+elif isinstance(imagedata, str):
+img = mupdf.fz_new_image_from_file(imagedata)
+else:
+res = JM_BufferFromBytes(imagedata)
+if not res.m_internal or not res.m_internal.len:
+raise ValueError( "bad image data")
+img = mupdf.fz_new_image_from_buffer(res)
+# Original code passed null for subarea and ctm, but that's not
+# possible with MuPDF's python bindings. The equivalent is an
+# infinite rect and identify matrix scaled by img.w() and img.h().
+pm, w, h = mupdf.fz_get_pixmap_from_image(
+img,
+mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
+mupdf.FzMatrix( img.w(), 0, 0, img.h(), 0, 0),
+)
+xres, yres = mupdf.fz_image_resolution(img)
+pm.m_internal.xres = xres
+pm.m_internal.yres = yres
+self.this = pm
+elif args_match(args, (Document, mupdf.FzDocument), int):
+# Create pixmap from PDF image identified by XREF number
+doc, xref = args
+pdf = _as_pdf_document(doc)
+xreflen = mupdf.pdf_xref_len(pdf)
+if not _INRANGE(xref, 1, xreflen-1):
+raise ValueError( MSG_BAD_XREF)
+ref = mupdf.pdf_new_indirect(pdf, xref, 0)
+type_ = mupdf.pdf_dict_get(ref, PDF_NAME('Subtype'))
+if (not mupdf.pdf_name_eq(type_, PDF_NAME('Image'))
+and not mupdf.pdf_name_eq(type_, PDF_NAME('Alpha'))
+and not mupdf.pdf_name_eq(type_, PDF_NAME('Luminosity'))
+):
+raise ValueError( MSG_IS_NO_IMAGE)
+img = mupdf.pdf_load_image(pdf, ref)
+# Original code passed null for subarea and ctm, but that's not
+# possible with MuPDF's python bindings. The equivalent is an
+# infinite rect and identify matrix scaled by img.w() and img.h().
+pix, w, h = mupdf.fz_get_pixmap_from_image(
+img,
+mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT),
+mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0),
+)
+self.this = pix
+else:
+text = 'Unrecognised args for constructing Pixmap:\n'
+for arg in args:
+text += f'    {type(arg)}: {arg}\n'
+raise Exception( text)
+def __len__(self):
+return self.size
+def __repr__(self):
+if not type(self) is Pixmap: return
+if self.colorspace:
+return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha)
+else:
+return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha)
+def _tobytes(self, format_, jpg_quality):
+'''
+Pixmap._tobytes
+'''
+pm = self.this
+size = mupdf.fz_pixmap_stride(pm) * pm.h()
+res = mupdf.fz_new_buffer(size)
+out = mupdf.FzOutput(res)
+if   format_ == 1:  mupdf.fz_write_pixmap_as_png(out, pm)
+elif format_ == 2:  mupdf.fz_write_pixmap_as_pnm(out, pm)
+elif format_ == 3:  mupdf.fz_write_pixmap_as_pam(out, pm)
+elif format_ == 5:  mupdf.fz_write_pixmap_as_psd(out, pm)
+elif format_ == 6:  mupdf.fz_write_pixmap_as_ps(out, pm)
+elif format_ == 7:
+mupdf.fz_write_pixmap_as_jpeg(out, pm, jpg_quality, 0)
+else:
+mupdf.fz_write_pixmap_as_png(out, pm)
+out.fz_close_output()
+barray = JM_BinFromBuffer(res)
+return barray
+def _writeIMG(self, filename, format_, jpg_quality):
+pm = self.this
+if   format_ == 1:  mupdf.fz_save_pixmap_as_png(pm, filename)
+elif format_ == 2:  mupdf.fz_save_pixmap_as_pnm(pm, filename)
+elif format_ == 3:  mupdf.fz_save_pixmap_as_pam(pm, filename)
+elif format_ == 5:  mupdf.fz_save_pixmap_as_psd(pm, filename)
+elif format_ == 6:  mupdf.fz_save_pixmap_as_ps(pm, filename)
+elif format_ == 7:  mupdf.fz_save_pixmap_as_jpeg(pm, filename, jpg_quality)
+else:               mupdf.fz_save_pixmap_as_png(pm, filename)
+@property
+def alpha(self):
+"""Indicates presence of alpha channel."""
+return mupdf.fz_pixmap_alpha(self.this)
+def clear_with(self, value=None, bbox=None):
+"""Fill all color components with same value."""
+if value is None:
+mupdf.fz_clear_pixmap(self.this)
+elif bbox is None:
+mupdf.fz_clear_pixmap_with_value(self.this, value)
+else:
+JM_clear_pixmap_rect_with_value(self.this, value, JM_irect_from_py(bbox))
+def color_count(self, colors=0, clip=None):
+'''
+Return count of each color.
+'''
+pm = self.this
+rc = JM_color_count( pm, clip)
+if not colors:
+return len( rc)
+return rc
+def color_topusage(self, clip=None):
+"""Return most frequent color and its usage ratio."""
+allpixels = 0
+cnt = 0
+if clip is not None and self.irect in Rect(clip):
+clip = self.irect
+for pixel, count in self.color_count(colors=True,clip=clip).items():
+allpixels += count
+if count > cnt:
+cnt = count
+maxpixel = pixel
+if not allpixels:
+return (1, bytes([255] * self.n))
+return (cnt / allpixels, maxpixel)
+@property
+def colorspace(self):
+"""Pixmap Colorspace."""
+cs = Colorspace(mupdf.fz_pixmap_colorspace(self.this))
+if cs.name == "None":
+return None
+return cs
+def copy(self, src, bbox):
+"""Copy bbox from another Pixmap."""
+pm = self.this
+src_pix = src.this
+if not mupdf.fz_pixmap_colorspace(src_pix):
+raise ValueError( "cannot copy pixmap with NULL colorspace")
+if pm.alpha() != src_pix.alpha():
+raise ValueError( "source and target alpha must be equal")
+mupdf.fz_copy_pixmap_rect(pm, src_pix, JM_irect_from_py(bbox), mupdf.FzDefaultColorspaces(None))
+@property
+def digest(self):
+"""MD5 digest of pixmap (bytes)."""
+ret = mupdf.fz_md5_pixmap2(self.this)
+return bytes(ret)
+def gamma_with(self, gamma):
+"""Apply correction with some float.
+gamma=1 is a no-op."""
+if not mupdf.fz_pixmap_colorspace( self.this):
+message_warning("colorspace invalid for function")
+return
+mupdf.fz_gamma_pixmap( self.this, gamma)
+@property
+def h(self):
+"""The height."""
+return mupdf.fz_pixmap_height(self.this)
+def invert_irect(self, bbox=None):
+"""Invert the colors inside a bbox."""
+pm = self.this
+if not mupdf.fz_pixmap_colorspace(pm).m_internal:
+message_warning("ignored for stencil pixmap")
+return False
+r = JM_irect_from_py(bbox)
+if mupdf.fz_is_infinite_irect(r):
+mupdf.fz_invert_pixmap(pm)
+return True
+mupdf.fz_invert_pixmap_rect(pm, r)
+return True
+@property
+def irect(self):
+"""Pixmap bbox - an IRect object."""
+val = mupdf.fz_pixmap_bbox(self.this)
+return JM_py_from_irect( val)
+@property
+def is_monochrome(self):
+"""Check if pixmap is monochrome."""
+return mupdf.fz_is_pixmap_monochrome( self.this)
+@property
+def is_unicolor(self):
+'''
+Check if pixmap has only one color.
+'''
+pm = self.this
+n = pm.n()
+count = pm.w() * pm.h() * n
+def _pixmap_read_samples(pm, offset, n):
+ret = list()
+for i in range(n):
+ret.append(mupdf.fz_samples_get(pm, offset+i))
+return ret
+for offset in range( 0, count, n):
+if offset == 0:
+sample0 = _pixmap_read_samples( pm, 0, n)
+else:
+sample = _pixmap_read_samples( pm, offset, n)
+if sample != sample0:
+return False
+return True
+@property
+def n(self):
+"""The size of one pixel."""
+if g_use_extra:
+# Setting self.__class__.n gives a small reduction in overhead of
+# test_general.py:test_2093, e.g. 1.4x -> 1.3x.
+#return extra.pixmap_n(self.this)
+def n2(self):
+return extra.pixmap_n(self.this)
+self.__class__.n = property(n2)
+return self.n
+return mupdf.fz_pixmap_components(self.this)
+def pdfocr_save(self, filename, compress=1, language=None, tessdata=None):
+'''
+Save pixmap as an OCR-ed PDF page.
+'''
+tessdata = get_tessdata(tessdata)
+opts = mupdf.FzPdfocrOptions()
+opts.compress = compress
+if language:
+opts.language_set2( language)
+if tessdata:
+opts.datadir_set2( tessdata)
+pix = self.this
+if isinstance(filename, str):
+mupdf.fz_save_pixmap_as_pdfocr( pix, filename, 0, opts)
+else:
+out = JM_new_output_fileptr( filename)
+try:
+mupdf.fz_write_pixmap_as_pdfocr( out, pix, opts)
+finally:
+out.fz_close_output()   # Avoid MuPDF warning.
+def pdfocr_tobytes(self, compress=True, language="eng", tessdata=None):
+"""Save pixmap as an OCR-ed PDF page.
+Args:
+compress: (bool) compress, default 1 (True).
+language: (str) language(s) occurring on page, default "eng" (English),
+multiples like "eng+ger" for English and German.
+tessdata: (str) folder name of Tesseract's language support. If None
+we use environment variable TESSDATA_PREFIX or search for
+Tesseract installation.
+Notes:
+On failure, make sure Tesseract is installed and you have set
+<tessdata> or environment variable "TESSDATA_PREFIX" to the folder
+containing your Tesseract's language support data.
+"""
+tessdata = get_tessdata(tessdata)
+from io import BytesIO
+bio = BytesIO()
+self.pdfocr_save(bio, compress=compress, language=language, tessdata=tessdata)
+return bio.getvalue()
+def pil_image(self):
+"""Create a Pillow Image from the Pixmap."""
+try:
+from PIL import Image
+except ImportError:
+message("PIL/Pillow not installed")
+raise
+cspace = self.colorspace
+if not cspace:
+mode = "L"
+elif cspace.n == 1:
+mode = "L" if not self.alpha else "LA"
+elif cspace.n == 3:
+mode = "RGB" if not self.alpha else "RGBA"
+else:
+mode = "CMYK"
+img = Image.frombytes(mode, (self.width, self.height), self.samples)
+return img
+def pil_save(self, *args, **kwargs):
+"""Write to image file using Pillow.
+An intermediate PIL Image is created, and its "save" method is used
+to store the image. See Pillow documentation to learn about the
+meaning of possible positional and keyword parameters.
+Use this when other output formats are desired.
+"""
+img = self.pil_image()
+if "dpi" not in kwargs.keys():
+kwargs["dpi"] = (self.xres, self.yres)
+img.save(*args, **kwargs)
+def pil_tobytes(self, *args, **kwargs):
+"""Convert to an image in memory using Pillow.
+An intermediate PIL Image is created, and its "save" method is used
+to store the image. See Pillow documentation to learn about the
+meaning of possible positional or keyword parameters.
+Use this when other output formats are desired.
+"""
+bytes_out = io.BytesIO()
+img = self.pil_image()
+if "dpi" not in kwargs.keys():
+kwargs["dpi"] = (self.xres, self.yres)
+img.save(bytes_out, *args, **kwargs)
+return bytes_out.getvalue()
+def pixel(self, x, y):
+"""Get color tuple of pixel (x, y).
+Last item is the alpha if Pixmap.alpha is true."""
+if g_use_extra:
+return extra.pixmap_pixel(self.this.m_internal, x, y)
+if (0
+or x < 0
+or x >= self.this.m_internal.w
+or y < 0
+or y >= self.this.m_internal.h
+):
+RAISEPY(MSG_PIXEL_OUTSIDE, PyExc_ValueError)
+n = self.this.m_internal.n
+stride = self.this.m_internal.stride
+i = stride * y + n * x
+ret = tuple( self.samples_mv[ i: i+n])
+return ret
+@property
+def samples(self)->bytes:
+mv = self.samples_mv
+return bytes( mv)
+@property
+def samples_mv(self):
+'''
+Pixmap samples memoryview.
+'''
+# We remember the returned memoryview so that our `__del__()` can
+# release it; otherwise accessing it after we have been destructed will
+# fail, possibly crashing Python; this is #4155.
+#
+if self._samples_mv is None:
+self._samples_mv = mupdf.fz_pixmap_samples_memoryview(self.this)
+return self._samples_mv
+def _samples_mv_release(self):
+if self._samples_mv:
+self._samples_mv.release()
+@property
+def samples_ptr(self):
+return mupdf.fz_pixmap_samples_int(self.this)
+def save(self, filename, output=None, jpg_quality=95):
+"""Output as image in format determined by filename extension.
+Args:
+output: (str) only use to overrule filename extension. Default is PNG.
+Others are JPEG, JPG, PNM, PGM, PPM, PBM, PAM, PSD, PS.
+"""
+valid_formats = {
+"png": 1,
+"pnm": 2,
+"pgm": 2,
+"ppm": 2,
+"pbm": 2,
+"pam": 3,
+"psd": 5,
+"ps": 6,
+"jpg": 7,
+"jpeg": 7,
+}
+if type(filename) is str:
+pass
+elif hasattr(filename, "absolute"):
+filename = str(filename)
+elif hasattr(filename, "name"):
+filename = filename.name
+if output is None:
+_, ext = os.path.splitext(filename)
+output = ext[1:]
+idx = valid_formats.get(output.lower(), None)
+if idx is None:
+raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
+if self.alpha and idx in (2, 6, 7):
+raise ValueError("'%s' cannot have alpha" % output)
+if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
+raise ValueError(f"unsupported colorspace for '{output}'")
+if idx == 7:
+self.set_dpi(self.xres, self.yres)
+return self._writeIMG(filename, idx, jpg_quality)
+def set_alpha(self, alphavalues=None, premultiply=1, opaque=None, matte=None):
+"""Set alpha channel to values contained in a byte array.
+If omitted, set alphas to 255.
+Args:
+alphavalues: (bytes) with length (width * height) or 'None'.
+premultiply: (bool, True) premultiply colors with alpha values.
+opaque: (tuple, length colorspace.n) this color receives opacity 0.
+matte: (tuple, length colorspace.n)) preblending background color.
+"""
+pix = self.this
+alpha = 0
+m = 0
+if pix.alpha() == 0:
+raise ValueError( MSG_PIX_NOALPHA)
+n = mupdf.fz_pixmap_colorants(pix)
+w = mupdf.fz_pixmap_width(pix)
+h = mupdf.fz_pixmap_height(pix)
+balen = w * h * (n+1)
+colors = [0, 0, 0, 0]   # make this color opaque
+bgcolor = [0, 0, 0, 0]  # preblending background color
+zero_out = 0
+bground = 0
+if opaque and isinstance(opaque, (list, tuple)) and len(opaque) == n:
+for i in range(n):
+colors[i] = opaque[i]
+zero_out = 1
+if matte and isinstance( matte, (tuple, list)) and len(matte) == n:
+for i in range(n):
+bgcolor[i] = matte[i]
+bground = 1
+data = bytes()
+data_len = 0
+if alphavalues:
+#res = JM_BufferFromBytes(alphavalues)
+#data_len, data = mupdf.fz_buffer_storage(res)
+#if data_len < w * h:
+#    THROWMSG("bad alpha values")
+# fixme: don't seem to need to create an fz_buffer - can
+# use <alphavalues> directly?
+if isinstance(alphavalues, (bytes, bytearray)):
+data = alphavalues
+data_len = len(alphavalues)
+else:
+assert 0, f'unexpected type for alphavalues: {type(alphavalues)}'
+if data_len < w * h:
+raise ValueError( "bad alpha values")
+if 1:
+# Use C implementation for speed.
+mupdf.Pixmap_set_alpha_helper(
+balen,
+n,
+data_len,
+zero_out,
+mupdf.python_buffer_data( data),
+pix.m_internal,
+premultiply,
+bground,
+colors,
+bgcolor,
+)
+else:
+i = k = j = 0
+data_fix = 255
+while i < balen:
+alpha = data[k]
+if zero_out:
+for j in range(i, i+n):
+if mupdf.fz_samples_get(pix, j) != colors[j - i]:
+data_fix = 255
+break
+else:
+data_fix = 0
+if data_len:
+def fz_mul255( a, b):
+x = a * b + 128
+x += x // 256
+return x // 256
+if data_fix == 0:
+mupdf.fz_samples_set(pix, i+n, 0)
+else:
+mupdf.fz_samples_set(pix, i+n, alpha)
+if premultiply and not bground:
+for j in range(i, i+n):
+mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j), alpha))
+elif bground:
+for j in range( i, i+n):
+m = bgcolor[j - i]
+mupdf.fz_samples_set(pix, j, fz_mul255( mupdf.fz_samples_get(pix, j) - m, alpha))
+else:
+mupdf.fz_samples_set(pix, i+n, data_fix)
+i += n+1
+k += 1
+def tobytes(self, output="png", jpg_quality=95):
+'''
+Convert to binary image stream of desired type.
+'''
+valid_formats = {
+"png": 1,
+"pnm": 2,
+"pgm": 2,
+"ppm": 2,
+"pbm": 2,
+"pam": 3,
+"tga": 4,
+"tpic": 4,
+"psd": 5,
+"ps": 6,
+'jpg': 7,
+'jpeg': 7,
+}
+idx = valid_formats.get(output.lower(), None)
+if idx is None:
+raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}")
+if self.alpha and idx in (2, 6, 7):
+raise ValueError("'{output}' cannot have alpha")
+if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4):
+raise ValueError(f"unsupported colorspace for '{output}'")
+if idx == 7:
+self.set_dpi(self.xres, self.yres)
+barray = self._tobytes(idx, jpg_quality)
+return barray
+def set_dpi(self, xres, yres):
+"""Set resolution in both dimensions."""
+pm = self.this
+pm.m_internal.xres = xres
+pm.m_internal.yres = yres
+def set_origin(self, x, y):
+"""Set top-left coordinates."""
+pm = self.this
+pm.m_internal.x = x
+pm.m_internal.y = y
+def set_pixel(self, x, y, color):
+"""Set color of pixel (x, y)."""
+if g_use_extra:
+return extra.set_pixel(self.this.m_internal, x, y, color)
+pm = self.this
+if not _INRANGE(x, 0, pm.w() - 1) or not _INRANGE(y, 0, pm.h() - 1):
+raise ValueError( MSG_PIXEL_OUTSIDE)
+n = pm.n()
+for j in range(n):
+i = color[j]
+if not _INRANGE(i, 0, 255):
+raise ValueError( MSG_BAD_COLOR_SEQ)
+stride = mupdf.fz_pixmap_stride( pm)
+i = stride * y + n * x
+if 0:
+# Using a cached self._memory_view doesn't actually make much
+# difference to speed.
+if not self._memory_view:
+self._memory_view = self.samples_mv
+for j in range(n):
+self._memory_view[i + j] = color[j]
+else:
+for j in range(n):
+pm.fz_samples_set(i + j, color[j])
+def set_rect(self, bbox, color):
+"""Set color of all pixels in bbox."""
+pm = self.this
+n = pm.n()
+c = []
+for j in range(n):
+i = color[j]
+if not _INRANGE(i, 0, 255):
+raise ValueError( MSG_BAD_COLOR_SEQ)
+c.append(i)
+bbox = JM_irect_from_py(bbox)
+i = JM_fill_pixmap_rect_with_color(pm, c, bbox)
+rc = bool(i)
+return rc
+def shrink(self, factor):
+"""Divide width and height by 2**factor.
+E.g. factor=1 shrinks to 25% of original size (in place)."""
+if factor < 1:
+message_warning("ignoring shrink factor < 1")
+return
+mupdf.fz_subsample_pixmap( self.this, factor)
+# Pixmap has changed so clear our memory view.
+self._memory_view = None
+self._samples_mv_release()
+@property
+def size(self):
+"""Pixmap size."""
+return  mupdf.fz_pixmap_size( self.this)
+@property
+def stride(self):
+"""Length of one image line (width * n)."""
+return self.this.stride()
+def tint_with(self, black, white):
+"""Tint colors with modifiers for black and white."""
+if not self.colorspace or self.colorspace.n > 3:
+message("warning: colorspace invalid for function")
+return
+return mupdf.fz_tint_pixmap( self.this, black, white)
+@property
+def w(self):
+"""The width."""
+return mupdf.fz_pixmap_width(self.this)
+def warp(self, quad, width, height):
+"""Return pixmap from a warped quad."""
+if not quad.is_convex: raise ValueError("quad must be convex")
+q = JM_quad_from_py(quad)
+points = [ q.ul, q.ur, q.lr, q.ll]
+dst = mupdf.fz_warp_pixmap( self.this, points, width, height)
+return Pixmap( dst)
+@property
+def x(self):
+"""x component of Pixmap origin."""
+return mupdf.fz_pixmap_x(self.this)
+@property
+def xres(self):
+"""Resolution in x direction."""
+return self.this.xres()
+@property
+def y(self):
+"""y component of Pixmap origin."""
+return mupdf.fz_pixmap_y(self.this)
+@property
+def yres(self):
+"""Resolution in y direction."""
+return self.this.yres()
+width  = w
+height = h
+def __del__(self):
+if self._samples_mv:
+self._samples_mv.release()
+del Point
+class Point:
+def __abs__(self):
+return math.sqrt(self.x * self.x + self.y * self.y)
+def __add__(self, p):
+if hasattr(p, "__float__"):
+return Point(self.x + p, self.y + p)
+if len(p) != 2:
+raise ValueError("Point: bad seq len")
+return Point(self.x + p[0], self.y + p[1])
+def __bool__(self):
+return not (max(self) == min(self) == 0)
+def __eq__(self, p):
+if not hasattr(p, "__len__"):
+return False
+return len(p) == 2 and not (self - p)
+def __getitem__(self, i):
+return (self.x, self.y)[i]
+def __hash__(self):
+return hash(tuple(self))
+def __init__(self, *args, x=None, y=None):
+'''
+Point() - all zeros
+Point(x, y)
+Point(Point) - new copy
+Point(sequence) - from 'sequence'
+Explicit keyword args x, y override earlier settings if not None.
+'''
+if not args:
+self.x = 0.0
+self.y = 0.0
+elif len(args) > 2:
+raise ValueError("Point: bad seq len")
+elif len(args) == 2:
+self.x = float(args[0])
+self.y = float(args[1])
+elif len(args) == 1:
+l = args[0]
+if isinstance(l, (mupdf.FzPoint, mupdf.fz_point)):
+self.x = l.x
+self.y = l.y
+else:
+if not hasattr(l, "__getitem__"):
+raise ValueError("Point: bad args")
+if len(l) != 2:
+raise ValueError("Point: bad seq len")
+self.x = float(l[0])
+self.y = float(l[1])
+else:
+raise ValueError("Point: bad seq len")
+if x is not None:   self.x = x
+if y is not None:   self.y = y
+def __len__(self):
+return 2
+def __mul__(self, m):
+if hasattr(m, "__float__"):
+return Point(self.x * m, self.y * m)
+if hasattr(m, "__getitem__") and len(m) == 2:
+# dot product
+return self.x * m[0] + self.y * m[1]
+p = Point(self)
+return p.transform(m)
+def __neg__(self):
+return Point(-self.x, -self.y)
+def __nonzero__(self):
+return not (max(self) == min(self) == 0)
+def __pos__(self):
+return Point(self)
+def __repr__(self):
+return "Point" + str(tuple(self))
+def __setitem__(self, i, v):
+v = float(v)
+if   i == 0: self.x = v
+elif i == 1: self.y = v
+else:
+raise IndexError("index out of range")
+return None
+def __sub__(self, p):
+if hasattr(p, "__float__"):
+return Point(self.x - p, self.y - p)
+if len(p) != 2:
+raise ValueError("Point: bad seq len")
+return Point(self.x - p[0], self.y - p[1])
+def __truediv__(self, m):
+if hasattr(m, "__float__"):
+return Point(self.x * 1./m, self.y * 1./m)
+m1 = util_invert_matrix(m)[1]
+if not m1:
+raise ZeroDivisionError("matrix not invertible")
+p = Point(self)
+return p.transform(m1)
+@property
+def abs_unit(self):
+"""Unit vector with positive coordinates."""
+s = self.x * self.x + self.y * self.y
+if s < EPSILON:
+return Point(0,0)
+s = math.sqrt(s)
+return Point(abs(self.x) / s, abs(self.y) / s)
+def distance_to(self, *args):
+"""Return distance to rectangle or another point."""
+if not len(args) > 0:
+raise ValueError("at least one parameter must be given")
+x = args[0]
+if len(x) == 2:
+x = Point(x)
+elif len(x) == 4:
+x = Rect(x)
+else:
+raise ValueError("arg1 must be point-like or rect-like")
+if len(args) > 1:
+unit = args[1]
+else:
+unit = "px"
+u = {"px": (1.,1.), "in": (1.,72.), "cm": (2.54, 72.),
+"mm": (25.4, 72.)}
+f = u[unit][0] / u[unit][1]
+if type(x) is Point:
+return abs(self - x) * f
+# from here on, x is a rectangle
+# as a safeguard, make a finite copy of it
+r = Rect(x.top_left, x.top_left)
+r = r | x.bottom_right
+if self in r:
+return 0.0
+if self.x > r.x1:
+if self.y >= r.y1:
+return self.distance_to(r.bottom_right, unit)
+elif self.y <= r.y0:
+return self.distance_to(r.top_right, unit)
+else:
+return (self.x - r.x1) * f
+elif r.x0 <= self.x <= r.x1:
+if self.y >= r.y1:
+return (self.y - r.y1) * f
+else:
+return (r.y0 - self.y) * f
+else:
+if self.y >= r.y1:
+return self.distance_to(r.bottom_left, unit)
+elif self.y <= r.y0:
+return self.distance_to(r.top_left, unit)
+else:
+return (r.x0 - self.x) * f
+def transform(self, m):
+"""Replace point by its transformation with matrix-like m."""
+if len(m) != 6:
+raise ValueError("Matrix: bad seq len")
+self.x, self.y = util_transform_point(self, m)
+return self
+@property
+def unit(self):
+"""Unit vector of the point."""
+s = self.x * self.x + self.y * self.y
+if s < EPSILON:
+return Point(0,0)
+s = math.sqrt(s)
+return Point(self.x / s, self.y / s)
+__div__ = __truediv__
+norm = __abs__
+class Quad:
+def __abs__(self):
+if self.is_empty:
+return 0.0
+return abs(self.ul - self.ur) * abs(self.ul - self.ll)
+def __add__(self, q):
+if hasattr(q, "__float__"):
+return Quad(self.ul + q, self.ur + q, self.ll + q, self.lr + q)
+if len(q) != 4:
+raise ValueError("Quad: bad seq len")
+return Quad(self.ul + q[0], self.ur + q[1], self.ll + q[2], self.lr + q[3])
+def __bool__(self):
+return not self.is_empty
+def __contains__(self, x):
+try:
+l = x.__len__()
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+return False
+if l == 2:
+return util_point_in_quad(x, self)
+if l != 4:
+return False
+if CheckRect(x):
+if Rect(x).is_empty:
+return True
+return util_point_in_quad(x[:2], self) and util_point_in_quad(x[2:], self)
+if CheckQuad(x):
+for i in range(4):
+if not util_point_in_quad(x[i], self):
+return False
+return True
+return False
+def __eq__(self, quad):
+if not hasattr(quad, "__len__"):
+return False
+return len(quad) == 4 and (
+self.ul == quad[0] and
+self.ur == quad[1] and
+self.ll == quad[2] and
+self.lr == quad[3]
+)
+def __getitem__(self, i):
+return (self.ul, self.ur, self.ll, self.lr)[i]
+def __hash__(self):
+return hash(tuple(self))
+def __init__(self, *args, ul=None, ur=None, ll=None, lr=None):
+'''
+Quad() - all zero points
+Quad(ul, ur, ll, lr)
+Quad(quad) - new copy
+Quad(sequence) - from 'sequence'
+Explicit keyword args ul, ur, ll, lr override earlier settings if not
+None.
+'''
+if not args:
+self.ul = self.ur = self.ll = self.lr = Point()
+elif len(args) > 4:
+raise ValueError("Quad: bad seq len")
+elif len(args) == 4:
+self.ul, self.ur, self.ll, self.lr = map(Point, args)
+elif len(args) == 1:
+l = args[0]
+if isinstance(l, mupdf.FzQuad):
+self.this = l
+self.ul, self.ur, self.ll, self.lr = Point(l.ul), Point(l.ur), Point(l.ll), Point(l.lr)
+elif not hasattr(l, "__getitem__"):
+raise ValueError("Quad: bad args")
+elif len(l) != 4:
+raise ValueError("Quad: bad seq len")
+else:
+self.ul, self.ur, self.ll, self.lr = map(Point, l)
+else:
+raise ValueError("Quad: bad args")
+if ul is not None:  self.ul = Point(ul)
+if ur is not None:  self.ur = Point(ur)
+if ll is not None:  self.ll = Point(ll)
+if lr is not None:  self.lr = Point(lr)
+def __len__(self):
+return 4
+def __mul__(self, m):
+q = Quad(self)
+q = q.transform(m)
+return q
+def __neg__(self):
+return Quad(-self.ul, -self.ur, -self.ll, -self.lr)
+def __nonzero__(self):
+return not self.is_empty
+def __pos__(self):
+return Quad(self)
+def __repr__(self):
+return "Quad" + str(tuple(self))
+def __setitem__(self, i, v):
+if   i == 0: self.ul = Point(v)
+elif i == 1: self.ur = Point(v)
+elif i == 2: self.ll = Point(v)
+elif i == 3: self.lr = Point(v)
+else:
+raise IndexError("index out of range")
+return None
+def __sub__(self, q):
+if hasattr(q, "__float__"):
+return Quad(self.ul - q, self.ur - q, self.ll - q, self.lr - q)
+if len(q) != 4:
+raise ValueError("Quad: bad seq len")
+return Quad(self.ul - q[0], self.ur - q[1], self.ll - q[2], self.lr - q[3])
+def __truediv__(self, m):
+if hasattr(m, "__float__"):
+im = 1. / m
+else:
+im = util_invert_matrix(m)[1]
+if not im:
+raise ZeroDivisionError("Matrix not invertible")
+q = Quad(self)
+q = q.transform(im)
+return q
+@property
+def is_convex(self):
+"""Check if quad is convex and not degenerate.
+Notes:
+Check that for the two diagonals, the other two corners are not
+on the same side of the diagonal.
+Returns:
+True or False.
+"""
+m = planish_line(self.ul, self.lr)  # puts this diagonal on x-axis
+p1 = self.ll * m  # transform the
+p2 = self.ur * m  # other two points
+if p1.y * p2.y > 0:
+return False
+m = planish_line(self.ll, self.ur)  # puts other diagonal on x-axis
+p1 = self.lr * m  # transform the
+p2 = self.ul * m  # remaining points
+if p1.y * p2.y > 0:
+return False
+return True
+@property
+def is_empty(self):
+"""Check whether all quad corners are on the same line.
+This is the case if width or height is zero.
+"""
+return self.width < EPSILON or self.height < EPSILON
+@property
+def is_infinite(self):
+"""Check whether this is the infinite quad."""
+return self.rect.is_infinite
+@property
+def is_rectangular(self):
+"""Check if quad is rectangular.
+Notes:
+Some rotation matrix can thus transform it into a rectangle.
+This is equivalent to three corners enclose 90 degrees.
+Returns:
+True or False.
+"""
+sine = util_sine_between(self.ul, self.ur, self.lr)
+if abs(sine - 1) > EPSILON:  # the sine of the angle
+return False
+sine = util_sine_between(self.ur, self.lr, self.ll)
+if abs(sine - 1) > EPSILON:
+return False
+sine = util_sine_between(self.lr, self.ll, self.ul)
+if abs(sine - 1) > EPSILON:
+return False
+return True
+def morph(self, p, m):
+"""Morph the quad with matrix-like 'm' and point-like 'p'.
+Return a new quad."""
+if self.is_infinite:
+return INFINITE_QUAD()
+delta = Matrix(1, 1).pretranslate(p.x, p.y)
+q = self * ~delta * m * delta
+return q
+@property
+def rect(self):
+r = Rect()
+r.x0 = min(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
+r.y0 = min(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
+r.x1 = max(self.ul.x, self.ur.x, self.lr.x, self.ll.x)
+r.y1 = max(self.ul.y, self.ur.y, self.lr.y, self.ll.y)
+return r
+def transform(self, m):
+"""Replace quad by its transformation with matrix m."""
+if hasattr(m, "__float__"):
+pass
+elif len(m) != 6:
+raise ValueError("Matrix: bad seq len")
+self.ul *= m
+self.ur *= m
+self.ll *= m
+self.lr *= m
+return self
+__div__ = __truediv__
+width  = property(lambda self: max(abs(self.ul - self.ur), abs(self.ll - self.lr)))
+height = property(lambda self: max(abs(self.ul - self.ll), abs(self.ur - self.lr)))
+class Rect:
+def __abs__(self):
+if self.is_empty or self.is_infinite:
+return 0.0
+return (self.x1 - self.x0) * (self.y1 - self.y0)
+def __add__(self, p):
+if hasattr(p, "__float__"):
+return Rect(self.x0 + p, self.y0 + p, self.x1 + p, self.y1 + p)
+if len(p) != 4:
+raise ValueError("Rect: bad seq len")
+return Rect(self.x0 + p[0], self.y0 + p[1], self.x1 + p[2], self.y1 + p[3])
+def __and__(self, x):
+if not hasattr(x, "__len__"):
+raise ValueError("bad operand 2")
+r1 = Rect(x)
+r = Rect(self)
+return r.intersect(r1)
+def __bool__(self):
+return not (max(self) == min(self) == 0)
+def __contains__(self, x):
+if hasattr(x, "__float__"):
+return x in tuple(self)
+l = len(x)
+if l == 2:
+return util_is_point_in_rect(x, self)
+if l == 4:
+r = INFINITE_RECT()
+try:
+r = Rect(x)
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+r = Quad(x).rect
+return (self.x0 <= r.x0 <= r.x1 <= self.x1 and
+self.y0 <= r.y0 <= r.y1 <= self.y1)
+return False
+def __eq__(self, rect):
+if not hasattr(rect, "__len__"):
+return False
+return len(rect) == 4 and not (self - rect)
+def __getitem__(self, i):
+return (self.x0, self.y0, self.x1, self.y1)[i]
+def __hash__(self):
+return hash(tuple(self))
+def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
+"""
+Rect() - all zeros
+Rect(x0, y0, x1, y1)
+Rect(top-left, x1, y1)
+Rect(x0, y0, bottom-right)
+Rect(top-left, bottom-right)
+Rect(Rect or IRect) - new copy
+Rect(sequence) - from 'sequence'
+Explicit keyword args p0, p1, x0, y0, x1, y1 override earlier settings
+if not None.
+"""
+x0, y0, x1, y1 = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
+self.x0 = float( x0)
+self.y0 = float( y0)
+self.x1 = float( x1)
+self.y1 = float( y1)
+def __len__(self):
+return 4
+def __mul__(self, m):
+if hasattr(m, "__float__"):
+return Rect(self.x0 * m, self.y0 * m, self.x1 * m, self.y1 * m)
+r = Rect(self)
+r = r.transform(m)
+return r
+def __neg__(self):
+return Rect(-self.x0, -self.y0, -self.x1, -self.y1)
+def __nonzero__(self):
+return not (max(self) == min(self) == 0)
+def __or__(self, x):
+if not hasattr(x, "__len__"):
+raise ValueError("bad operand 2")
+r = Rect(self)
+if len(x) == 2:
+return r.include_point(x)
+if len(x) == 4:
+return r.include_rect(x)
+raise ValueError("bad operand 2")
+def __pos__(self):
+return Rect(self)
+def __repr__(self):
+return "Rect" + str(tuple(self))
+def __setitem__(self, i, v):
+v = float(v)
+if   i == 0: self.x0 = v
+elif i == 1: self.y0 = v
+elif i == 2: self.x1 = v
+elif i == 3: self.y1 = v
+else:
+raise IndexError("index out of range")
+return None
+def __sub__(self, p):
+if hasattr(p, "__float__"):
+return Rect(self.x0 - p, self.y0 - p, self.x1 - p, self.y1 - p)
+if len(p) != 4:
+raise ValueError("Rect: bad seq len")
+return Rect(self.x0 - p[0], self.y0 - p[1], self.x1 - p[2], self.y1 - p[3])
+def __truediv__(self, m):
+if hasattr(m, "__float__"):
+return Rect(self.x0 * 1./m, self.y0 * 1./m, self.x1 * 1./m, self.y1 * 1./m)
+im = util_invert_matrix(m)[1]
+if not im:
+raise ZeroDivisionError(f"Matrix not invertible: {m}")
+r = Rect(self)
+r = r.transform(im)
+return r
+@property
+def bottom_left(self):
+"""Bottom-left corner."""
+return Point(self.x0, self.y1)
+@property
+def bottom_right(self):
+"""Bottom-right corner."""
+return Point(self.x1, self.y1)
+def contains(self, x):
+"""Check if containing point-like or rect-like x."""
+return self.__contains__(x)
+@property
+def height(self):
+return max(0, self.y1 - self.y0)
+def include_point(self, p):
+"""Extend to include point-like p."""
+if len(p) != 2:
+raise ValueError("Point: bad seq len")
+self.x0, self.y0, self.x1, self.y1 = util_include_point_in_rect(self, p)
+return self
+def include_rect(self, r):
+"""Extend to include rect-like r."""
+if len(r) != 4:
+raise ValueError("Rect: bad seq len")
+r = Rect(r)
+if r.is_infinite or self.is_infinite:
+self.x0, self.y0, self.x1, self.y1 = FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT
+elif r.is_empty:
+return self
+elif self.is_empty:
+self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
+else:
+self.x0, self.y0, self.x1, self.y1 = util_union_rect(self, r)
+return self
+def intersect(self, r):
+"""Restrict to common rect with rect-like r."""
+if not len(r) == 4:
+raise ValueError("Rect: bad seq len")
+r = Rect(r)
+if r.is_infinite:
+return self
+elif self.is_infinite:
+self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
+elif r.is_empty:
+self.x0, self.y0, self.x1, self.y1 = r.x0, r.y0, r.x1, r.y1
+elif self.is_empty:
+return self
+else:
+self.x0, self.y0, self.x1, self.y1 = util_intersect_rect(self, r)
+return self
+def intersects(self, x):
+"""Check if intersection with rectangle x is not empty."""
+rect2 = Rect(x)
+return (1
+and not self.is_empty
+and not self.is_infinite
+and not rect2.is_empty
+and not rect2.is_infinite
+and self.x0 < rect2.x1
+and rect2.x0 < self.x1
+and self.y0 < rect2.y1
+and rect2.y0 < self.y1
+)
+@property
+def is_empty(self):
+"""True if rectangle area is empty."""
+return self.x0 >= self.x1 or self.y0 >= self.y1
+@property
+def is_infinite(self):
+"""True if this is the infinite rectangle."""
+return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
+@property
+def is_valid(self):
+"""True if rectangle is valid."""
+return self.x0 <= self.x1 and self.y0 <= self.y1
+def morph(self, p, m):
+"""Morph with matrix-like m and point-like p.
+Returns a new quad."""
+if self.is_infinite:
+return INFINITE_QUAD()
+return self.quad.morph(p, m)
+def norm(self):
+return math.sqrt(sum([c*c for c in self]))
+def normalize(self):
+"""Replace rectangle with its finite version."""
+if self.x1 < self.x0:
+self.x0, self.x1 = self.x1, self.x0
+if self.y1 < self.y0:
+self.y0, self.y1 = self.y1, self.y0
+return self
+@property
+def quad(self):
+"""Return Quad version of rectangle."""
+return Quad(self.tl, self.tr, self.bl, self.br)
+def round(self):
+"""Return the IRect."""
+return IRect(util_round_rect(self))
+@property
+def top_left(self):
+"""Top-left corner."""
+return Point(self.x0, self.y0)
+@property
+def top_right(self):
+"""Top-right corner."""
+return Point(self.x1, self.y0)
+def torect(self, r):
+"""Return matrix that converts to target rect."""
+r = Rect(r)
+if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
+raise ValueError("rectangles must be finite and not empty")
+return (
+Matrix(1, 0, 0, 1, -self.x0, -self.y0)
+* Matrix(r.width / self.width, r.height / self.height)
+* Matrix(1, 0, 0, 1, r.x0, r.y0)
+)
+def transform(self, m):
+"""Replace with the transformation by matrix-like m."""
+if not len(m) == 6:
+raise ValueError("Matrix: bad seq len")
+self.x0, self.y0, self.x1, self.y1 = util_transform_rect(self, m)
+return self
+@property
+def width(self):
+return max(0, self.x1 - self.x0)
+__div__ = __truediv__
+bl = bottom_left
+br = bottom_right
+irect = property(round)
+tl = top_left
+tr = top_right
+class Story:
+def __init__( self, html='', user_css=None, em=12, archive=None):
+buffer_ = mupdf.fz_new_buffer_from_copied_data( html.encode('utf-8'))
+if archive and not isinstance(archive, Archive):
+archive = Archive(archive)
+arch = archive.this if archive else mupdf.FzArchive( None)
+if hasattr(mupdf, 'FzStoryS'):
+self.this = mupdf.FzStoryS( buffer_, user_css, em, arch)
+else:
+self.this = mupdf.FzStory( buffer_, user_css, em, arch)
+def add_header_ids(self):
+'''
+Look for `<h1..6>` items in `self` and adds unique `id`
+attributes if not already present.
+'''
+dom = self.body
+i = 0
+x = dom.find(None, None, None)
+while x:
+name = x.tagname
+if len(name) == 2 and name[0]=="h" and name[1] in "123456":
+attr = x.get_attribute_value("id")
+if not attr:
+id_ = f"h_id_{i}"
+#log(f"{name=}: setting {id_=}")
+x.set_attribute("id", id_)
+i += 1
+x = x.find_next(None, None, None)
+@staticmethod
+def add_pdf_links(document_or_stream, positions):
+"""
+Adds links to PDF document.
+Args:
+document_or_stream:
+A PDF `Document` or raw PDF content, for example an
+`io.BytesIO` instance.
+positions:
+List of `ElementPosition`'s for `document_or_stream`,
+typically from Story.element_positions(). We raise an
+exception if two or more positions have same id.
+Returns:
+`document_or_stream` if a `Document` instance, otherwise a
+new `Document` instance.
+We raise an exception if an `href` in `positions` refers to an
+internal position `#<name>` but no item in `positions` has `id =
+name`.
+"""
+if isinstance(document_or_stream, Document):
+document = document_or_stream
+else:
+document = Document("pdf", document_or_stream)
+# Create dict from id to position, which we will use to find
+# link destinations.
+#
+id_to_position = dict()
+#log(f"positions: {positions}")
+for position in positions:
+#log(f"add_pdf_links(): position: {position}")
+if (position.open_close & 1) and position.id:
+#log(f"add_pdf_links(): position with id: {position}")
+if position.id in id_to_position:
+#log(f"Ignoring duplicate positions with id={position.id!r}")
+pass
+else:
+id_to_position[ position.id] = position
+# Insert links for all positions that have an `href`.
+#
+for position_from in positions:
+if (position_from.open_close & 1) and position_from.href:
+#log(f"add_pdf_links(): position with href: {position}")
+link = dict()
+link['from'] = Rect(position_from.rect)
+if position_from.href.startswith("#"):
+#`<a href="#...">...</a>` internal link.
+target_id = position_from.href[1:]
+try:
+position_to = id_to_position[ target_id]
+except Exception as e:
+if g_exceptions_verbose > 1:    exception_info()
+raise RuntimeError(f"No destination with id={target_id}, required by position_from: {position_from}") from e
+# Make link from `position_from`'s rect to top-left of
+# `position_to`'s rect.
+if 0:
+log(f"add_pdf_links(): making link from:")
+log(f"add_pdf_links():    {position_from}")
+log(f"add_pdf_links(): to:")
+log(f"add_pdf_links():    {position_to}")
+link["kind"] = LINK_GOTO
+x0, y0, x1, y1 = position_to.rect
+# This appears to work well with viewers which scroll
+# to make destination point top-left of window.
+link["to"] = Point(x0, y0)
+link["page"] = position_to.page_num - 1
+else:
+# `<a href="...">...</a>` external link.
+if position_from.href.startswith('name:'):
+link['kind'] = LINK_NAMED
+link['name'] = position_from.href[5:]
+else:
+link['kind'] = LINK_URI
+link['uri'] = position_from.href
+#log(f'Adding link: {position_from.page_num=} {link=}.')
+document[position_from.page_num - 1].insert_link(link)
+return document
+@property
+def body(self):
+dom = self.document()
+return dom.bodytag()
+def document( self):
+dom = mupdf.fz_story_document( self.this)
+return Xml( dom)
+def draw( self, device, matrix=None):
+ctm2 = JM_matrix_from_py( matrix)
+dev = device.this if device else mupdf.FzDevice( None)
+mupdf.fz_draw_story( self.this, dev, ctm2)
+def element_positions( self, function, args=None):
+'''
+Trigger a callback function to record where items have been placed.
+'''
+if type(args) is dict:
+for k in args.keys():
+if not (type(k) is str and k.isidentifier()):
+raise ValueError(f"invalid key '{k}'")
+else:
+args = {}
+if not callable(function) or function.__code__.co_argcount != 1:
+raise ValueError("callback 'function' must be a callable with exactly one argument")
+def function2( position):
+class Position2:
+pass
+position2 = Position2()
+position2.depth = position.depth
+position2.heading = position.heading
+position2.id = position.id
+position2.rect = JM_py_from_rect(position.rect)
+position2.text = position.text
+position2.open_close = position.open_close
+position2.rect_num = position.rectangle_num
+position2.href = position.href
+if args:
+for k, v in args.items():
+setattr( position2, k, v)
+function( position2)
+mupdf.fz_story_positions( self.this, function2)
+def place( self, where):
+where = JM_rect_from_py( where)
+filled = mupdf.FzRect()
+more = mupdf.fz_place_story( self.this, where, filled)
+return more, JM_py_from_rect( filled)
+def reset( self):
+mupdf.fz_reset_story( self.this)
+def write(self, writer, rectfn, positionfn=None, pagefn=None):
+dev = None
+page_num = 0
+rect_num = 0
+filled = Rect(0, 0, 0, 0)
+while 1:
+mediabox, rect, ctm = rectfn(rect_num, filled)
+rect_num += 1
+if mediabox:
+# new page.
+page_num += 1
+more, filled = self.place( rect)
+if positionfn:
+def positionfn2(position):
+# We add a `.page_num` member to the
+# `ElementPosition` instance.
+position.page_num = page_num
+positionfn(position)
+self.element_positions(positionfn2)
+if writer:
+if mediabox:
+# new page.
+if dev:
+if pagefn:
+pagefn(page_num, mediabox, dev, 1)
+writer.end_page()
+dev = writer.begin_page( mediabox)
+if pagefn:
+pagefn(page_num, mediabox, dev, 0)
+self.draw( dev, ctm)
+if not more:
+if pagefn:
+pagefn( page_num, mediabox, dev, 1)
+writer.end_page()
+else:
+self.draw(None, ctm)
+if not more:
+break
+@staticmethod
+def write_stabilized(writer, contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
+positions = list()
+content = None
+# Iterate until stable.
+while 1:
+content_prev = content
+content = contentfn( positions)
+stable = False
+if content == content_prev:
+stable = True
+content2 = content
+story = Story(content2, user_css, em, archive)
+if add_header_ids:
+story.add_header_ids()
+positions = list()
+def positionfn2(position):
+#log(f"write_stabilized(): {stable=} {positionfn=} {position=}")
+positions.append(position)
+if stable and positionfn:
+positionfn(position)
+story.write(
+writer if stable else None,
+rectfn,
+positionfn2,
+pagefn,
+)
+if stable:
+break
+@staticmethod
+def write_stabilized_with_links(contentfn, rectfn, user_css=None, em=12, positionfn=None, pagefn=None, archive=None, add_header_ids=True):
+#log("write_stabilized_with_links()")
+stream = io.BytesIO()
+writer = DocumentWriter(stream)
+positions = []
+def positionfn2(position):
+#log(f"write_stabilized_with_links(): {position=}")
+positions.append(position)
+if positionfn:
+positionfn(position)
+Story.write_stabilized(writer, contentfn, rectfn, user_css, em, positionfn2, pagefn, archive, add_header_ids)
+writer.close()
+stream.seek(0)
+return Story.add_pdf_links(stream, positions)
+def write_with_links(self, rectfn, positionfn=None, pagefn=None):
+#log("write_with_links()")
+stream = io.BytesIO()
+writer = DocumentWriter(stream)
+positions = []
+def positionfn2(position):
+#log(f"write_with_links(): {position=}")
+positions.append(position)
+if positionfn:
+positionfn(position)
+self.write(writer, rectfn, positionfn=positionfn2, pagefn=pagefn)
+writer.close()
+stream.seek(0)
+return Story.add_pdf_links(stream, positions)
+class FitResult:
+'''
+The result from a `Story.fit*()` method.
+Members:
+`big_enough`:
+`True` if the fit succeeded.
+`filled`:
+From the last call to `Story.place()`.
+`more`:
+`False` if the fit succeeded.
+`numcalls`:
+Number of calls made to `self.place()`.
+`parameter`:
+The successful parameter value, or the largest failing value.
+`rect`:
+The rect created from `parameter`.
+'''
+def __init__(self, big_enough=None, filled=None, more=None, numcalls=None, parameter=None, rect=None):
+self.big_enough = big_enough
+self.filled = filled
+self.more = more
+self.numcalls = numcalls
+self.parameter = parameter
+self.rect = rect
+def __repr__(self):
+return (
+f' big_enough={self.big_enough}'
+f' filled={self.filled}'
+f' more={self.more}'
+f' numcalls={self.numcalls}'
+f' parameter={self.parameter}'
+f' rect={self.rect}'
+)
+def fit(self, fn, pmin=None, pmax=None, delta=0.001, verbose=False):
+'''
+Finds optimal rect that contains the story `self`.
+Returns a `Story.FitResult` instance.
+On success, the last call to `self.place()` will have been with the
+returned rectangle, so `self.draw()` can be used directly.
+Args:
+:arg fn:
+A callable taking a floating point `parameter` and returning a
+`pymupdf.Rect()`. If the rect is empty, we assume the story will
+not fit and do not call `self.place()`.
+Must guarantee that `self.place()` behaves monotonically when
+given rect `fn(parameter`) as `parameter` increases. This
+usually means that both width and height increase or stay
+unchanged as `parameter` increases.
+:arg pmin:
+Minimum parameter to consider; `None` for -infinity.
+:arg pmax:
+Maximum parameter to consider; `None` for +infinity.
+:arg delta:
+Maximum error in returned `parameter`.
+:arg verbose:
+If true we output diagnostics.
+'''
+def log(text):
+assert verbose
+message(f'fit(): {text}')
+assert isinstance(pmin, (int, float)) or pmin is None
+assert isinstance(pmax, (int, float)) or pmax is None
+class State:
+def __init__(self):
+self.pmin = pmin
+self.pmax = pmax
+self.pmin_result = None
+self.pmax_result = None
+self.result = None
+self.numcalls = 0
+if verbose:
+self.pmin0 = pmin
+self.pmax0 = pmax
+state = State()
+if verbose:
+log(f'starting. {state.pmin=} {state.pmax=}.')
+self.reset()
+def ret():
+if state.pmax is not None:
+if state.last_p != state.pmax:
+if verbose:
+log(f'Calling update() with pmax, because was overwritten by later calls.')
+big_enough = update(state.pmax)
+assert big_enough
+result = state.pmax_result
+else:
+result = state.pmin_result if state.pmin_result else Story.FitResult(numcalls=state.numcalls)
+if verbose:
+log(f'finished. {state.pmin0=} {state.pmax0=} {state.pmax=}: returning {result=}')
+return result
+def update(parameter):
+'''
+Evaluates `more, _ = self.place(fn(parameter))`. If `more` is
+false, then `rect` is big enough to contain `self` and we
+set `state.pmax=parameter` and return True. Otherwise we set
+`state.pmin=parameter` and return False.
+'''
+rect = fn(parameter)
+assert isinstance(rect, Rect), f'{type(rect)=} {rect=}'
+if rect.is_empty:
+big_enough = False
+result = Story.FitResult(parameter=parameter, numcalls=state.numcalls)
+if verbose:
+log(f'update(): not calling self.place() because rect is empty.')
+else:
+more, filled = self.place(rect)
+state.numcalls += 1
+big_enough = not more
+result = Story.FitResult(
+filled=filled,
+more=more,
+numcalls=state.numcalls,
+parameter=parameter,
+rect=rect,
+big_enough=big_enough,
+)
+if verbose:
+log(f'update(): called self.place(): {state.numcalls:>2d}: {more=} {parameter=} {rect=}.')
+if big_enough:
+state.pmax = parameter
+state.pmax_result = result
+else:
+state.pmin = parameter
+state.pmin_result = result
+state.last_p = parameter
+return big_enough
+def opposite(p, direction):
+'''
+Returns same sign as `direction`, larger or smaller than `p` if
+direction is positive or negative respectively.
+'''
+if p is None or p==0:
+return direction
+if direction * p > 0:
+return 2 * p
+return -p
+if state.pmin is None:
+# Find an initial finite pmin value.
+if verbose: log(f'finding pmin.')
+parameter = opposite(state.pmax, -1)
+while 1:
+if not update(parameter):
+break
+parameter *= 2
+else:
+if update(state.pmin):
+if verbose: log(f'{state.pmin=} is big enough.')
+return ret()
+if state.pmax is None:
+# Find an initial finite pmax value.
+if verbose: log(f'finding pmax.')
+parameter = opposite(state.pmin, +1)
+while 1:
+if update(parameter):
+break
+parameter *= 2
+else:
+if not update(state.pmax):
+# No solution possible.
+state.pmax = None
+if verbose: log(f'No solution possible {state.pmax=}.')
+return ret()
+# Do binary search in pmin..pmax.
+if verbose: log(f'doing binary search with {state.pmin=} {state.pmax=}.')
+while 1:
+if state.pmax - state.pmin < delta:
+return ret()
+parameter = (state.pmin + state.pmax) / 2
+update(parameter)
+def fit_scale(self, rect, scale_min=0, scale_max=None, delta=0.001, verbose=False):
+'''
+Finds smallest value `scale` in range `scale_min..scale_max` where
+`scale * rect` is large enough to contain the story `self`.
+Returns a `Story.FitResult` instance.
+:arg width:
+width of rect.
+:arg height:
+height of rect.
+:arg scale_min:
+Minimum scale to consider; must be >= 0.
+:arg scale_max:
+Maximum scale to consider, must be >= scale_min or `None` for
+infinite.
+:arg delta:
+Maximum error in returned scale.
+:arg verbose:
+If true we output diagnostics.
+'''
+x0, y0, x1, y1 = rect
+width = x1 - x0
+height = y1 - y0
+def fn(scale):
+return Rect(x0, y0, x0 + scale*width, y0 + scale*height)
+return self.fit(fn, scale_min, scale_max, delta, verbose)
+def fit_height(self, width, height_min=0, height_max=None, origin=(0, 0), delta=0.001, verbose=False):
+'''
+Finds smallest height in range `height_min..height_max` where a rect
+with size `(width, height)` is large enough to contain the story
+`self`.
+Returns a `Story.FitResult` instance.
+:arg width:
+width of rect.
+:arg height_min:
+Minimum height to consider; must be >= 0.
+:arg height_max:
+Maximum height to consider, must be >= height_min or `None` for
+infinite.
+:arg origin:
+`(x0, y0)` of rect.
+:arg delta:
+Maximum error in returned height.
+:arg verbose:
+If true we output diagnostics.
+'''
+x0, y0 = origin
+x1 = x0 + width
+def fn(height):
+return Rect(x0, y0, x1, y0+height)
+return self.fit(fn, height_min, height_max, delta, verbose)
+def fit_width(self, height, width_min=0, width_max=None, origin=(0, 0), delta=0.001, verbose=False):
+'''
+Finds smallest width in range `width_min..width_max` where a rect with size
+`(width, height)` is large enough to contain the story `self`.
+Returns a `Story.FitResult` instance.
+Returns a `FitResult` instance.
+:arg height:
+height of rect.
+:arg width_min:
+Minimum width to consider; must be >= 0.
+:arg width_max:
+Maximum width to consider, must be >= width_min or `None` for
+infinite.
+:arg origin:
+`(x0, y0)` of rect.
+:arg delta:
+Maximum error in returned width.
+:arg verbose:
+If true we output diagnostics.
+'''
+x0, y0 = origin
+y1 = y0 + height
+def fn(width):
+return Rect(x0, y0, x0+width, y1)
+return self.fit(fn, width_min, width_max, delta, verbose)
+class TextPage:
+def __init__(self, *args):
+if args_match(args, mupdf.FzRect):
+mediabox = args[0]
+self.this = mupdf.FzStextPage( mediabox)
+elif args_match(args, mupdf.FzStextPage):
+self.this = args[0]
+else:
+raise Exception(f'Unrecognised args: {args}')
+self.thisown = True
+self.parent = None
+def _extractText(self, format_):
+this_tpage = self.this
+res = mupdf.fz_new_buffer(1024)
+out = mupdf.FzOutput( res)
+# fixme: mupdfwrap.py thinks fz_output is not copyable, possibly
+# because there is no .refs member visible and no fz_keep_output() fn,
+# although there is an fz_drop_output(). So mupdf.fz_new_output_with_buffer()
+# doesn't convert the returned fz_output* into a mupdf.FzOutput.
+#out = mupdf.FzOutput(out)
+if format_ == 1:
+mupdf.fz_print_stext_page_as_html(out, this_tpage, 0)
+elif format_ == 3:
+mupdf.fz_print_stext_page_as_xml(out, this_tpage, 0)
+elif format_ == 4:
+mupdf.fz_print_stext_page_as_xhtml(out, this_tpage, 0)
+else:
+JM_print_stext_page_as_text(res, this_tpage)
+out.fz_close_output()
+text = JM_EscapeStrFromBuffer(res)
+return text
+def _getNewBlockList(self, page_dict, raw):
+JM_make_textpage_dict(self.this, page_dict, raw)
+def _textpage_dict(self, raw=False):
+page_dict = {"width": self.rect.width, "height": self.rect.height}
+self._getNewBlockList(page_dict, raw)
+return page_dict
+def extractBLOCKS(self):
+"""Return a list with text block information."""
+if g_use_extra:
+return extra.extractBLOCKS(self.this)
+block_n = -1
+this_tpage = self.this
+tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
+res = mupdf.fz_new_buffer(1024)
+lines = []
+for block in this_tpage:
+block_n += 1
+blockrect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
+mupdf.fz_clear_buffer(res) # set text buffer to empty
+line_n = -1
+last_char = 0
+for line in block:
+line_n += 1
+linerect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
+for ch in line:
+cbbox = JM_char_bbox(line, ch)
+if (not JM_rects_overlap(tp_rect, cbbox)
+and not mupdf.fz_is_infinite_rect(tp_rect)
+):
+continue
+JM_append_rune(res, ch.m_internal.c)
+last_char = ch.m_internal.c
+linerect = mupdf.fz_union_rect(linerect, cbbox)
+if last_char != 10 and not mupdf.fz_is_empty_rect(linerect):
+mupdf.fz_append_byte(res, 10)
+blockrect = mupdf.fz_union_rect(blockrect, linerect)
+text = JM_EscapeStrFromBuffer(res)
+elif (JM_rects_overlap(tp_rect, block.m_internal.bbox)
+or mupdf.fz_is_infinite_rect(tp_rect)
+):
+img = block.i_image()
+cs = img.colorspace()
+text = "<image: %s, width: %d, height: %d, bpc: %d>" % (
+mupdf.fz_colorspace_name(cs),
+img.w(), img.h(), img.bpc()
+)
+blockrect = mupdf.fz_union_rect(blockrect, mupdf.FzRect(block.m_internal.bbox))
+if not mupdf.fz_is_empty_rect(blockrect):
+litem = (
+blockrect.x0,
+blockrect.y0,
+blockrect.x1,
+blockrect.y1,
+text,
+block_n,
+block.m_internal.type,
+)
+lines.append(litem)
+return lines
+def extractDICT(self, cb=None, sort=False) -> dict:
+"""Return page content as a Python dict of images and text spans."""
+val = self._textpage_dict(raw=False)
+if cb is not None:
+val["width"] = cb.width
+val["height"] = cb.height
+if sort:
+blocks = val["blocks"]
+blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
+val["blocks"] = blocks
+return val
+def extractHTML(self) -> str:
+"""Return page content as a HTML string."""
+return self._extractText(1)
+def extractIMGINFO(self, hashes=0):
+"""Return a list with image meta information."""
+block_n = -1
+this_tpage = self.this
+rc = []
+for block in this_tpage:
+block_n += 1
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
+continue
+img = block.i_image()
+img_size = 0
+mask = img.mask()
+if mask.m_internal:
+has_mask = True
+else:
+has_mask = False
+compr_buff = mupdf.fz_compressed_image_buffer(img)
+if compr_buff.m_internal:
+img_size = compr_buff.fz_compressed_buffer_size()
+compr_buff = None
+if hashes:
+r = mupdf.FzIrect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
+assert mupdf.fz_is_infinite_irect(r)
+m = mupdf.FzMatrix(img.w(), 0, 0, img.h(), 0, 0)
+pix, w, h = mupdf.fz_get_pixmap_from_image(img, r, m)
+digest = mupdf.fz_md5_pixmap2(pix)
+digest = bytes(digest)
+if img_size == 0:
+img_size = img.w() * img.h() * img.n()
+cs = mupdf.FzColorspace(mupdf.ll_fz_keep_colorspace(img.m_internal.colorspace))
+block_dict = dict()
+block_dict[dictkey_number] = block_n
+block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
+block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
+block_dict[dictkey_width] = img.w()
+block_dict[dictkey_height] = img.h()
+block_dict[dictkey_colorspace] = mupdf.fz_colorspace_n(cs)
+block_dict[dictkey_cs_name] = mupdf.fz_colorspace_name(cs)
+block_dict[dictkey_xres] = img.xres()
+block_dict[dictkey_yres] = img.yres()
+block_dict[dictkey_bpc] = img.bpc()
+block_dict[dictkey_size] = img_size
+if hashes:
+block_dict["digest"] = digest
+block_dict["has-mask"] = has_mask
+rc.append(block_dict)
+return rc
+def extractJSON(self, cb=None, sort=False) -> str:
+"""Return 'extractDICT' converted to JSON format."""
+import base64
+import json
+val = self._textpage_dict(raw=False)
+class b64encode(json.JSONEncoder):
+def default(self, s):
+if type(s) in (bytes, bytearray):
+return base64.b64encode(s).decode()
+if cb is not None:
+val["width"] = cb.width
+val["height"] = cb.height
+if sort:
+blocks = val["blocks"]
+blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
+val["blocks"] = blocks
+val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
+return val
+def extractRAWDICT(self, cb=None, sort=False) -> dict:
+"""Return page content as a Python dict of images and text characters."""
+val = self._textpage_dict(raw=True)
+if cb is not None:
+val["width"] = cb.width
+val["height"] = cb.height
+if sort:
+blocks = val["blocks"]
+blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
+val["blocks"] = blocks
+return val
+def extractRAWJSON(self, cb=None, sort=False) -> str:
+"""Return 'extractRAWDICT' converted to JSON format."""
+import base64
+import json
+val = self._textpage_dict(raw=True)
+class b64encode(json.JSONEncoder):
+def default(self,s):
+if type(s) in (bytes, bytearray):
+return base64.b64encode(s).decode()
+if cb is not None:
+val["width"] = cb.width
+val["height"] = cb.height
+if sort:
+blocks = val["blocks"]
+blocks.sort(key=lambda b: (b["bbox"][3], b["bbox"][0]))
+val["blocks"] = blocks
+val = json.dumps(val, separators=(",", ":"), cls=b64encode, indent=1)
+return val
+def extractSelection(self, pointa, pointb):
+a = JM_point_from_py(pointa)
+b = JM_point_from_py(pointb)
+found = mupdf.fz_copy_selection(self.this, a, b, 0)
+return found
+def extractText(self, sort=False) -> str:
+"""Return simple, bare text on the page."""
+if not sort:
+return self._extractText(0)
+blocks = self.extractBLOCKS()[:]
+blocks.sort(key=lambda b: (b[3], b[0]))
+return "".join([b[4] for b in blocks])
+def extractTextbox(self, rect):
+this_tpage = self.this
+assert isinstance(this_tpage, mupdf.FzStextPage)
+area = JM_rect_from_py(rect)
+found = JM_copy_rectangle(this_tpage, area)
+rc = PyUnicode_DecodeRawUnicodeEscape(found)
+return rc
+def extractWORDS(self, delimiters=None):
+"""Return a list with text word information."""
+if g_use_extra:
+return extra.extractWORDS(self.this, delimiters)
+buflen = 0
+last_char_rtl = 0
+block_n = -1
+wbbox = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)  # word bbox
+this_tpage = self.this
+tp_rect = mupdf.FzRect(this_tpage.m_internal.mediabox)
+lines = None
+buff = mupdf.fz_new_buffer(64)
+lines = []
+for block in this_tpage:
+block_n += 1
+if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
+continue
+line_n = -1
+for line in block:
+line_n += 1
+word_n = 0                  # word counter per line
+mupdf.fz_clear_buffer(buff) # reset word buffer
+buflen = 0                  # reset char counter
+for ch in line:
+cbbox = JM_char_bbox(line, ch)
+if (not JM_rects_overlap(tp_rect, cbbox)
+and not mupdf.fz_is_infinite_rect(tp_rect)
+):
+continue
+word_delimiter = JM_is_word_delimiter(ch.m_internal.c, delimiters)
+this_char_rtl = JM_is_rtl_char(ch.m_internal.c)
+if word_delimiter or this_char_rtl != last_char_rtl:
+if buflen == 0 and word_delimiter:
+continue    # skip delimiters at line start
+if not mupdf.fz_is_empty_rect(wbbox):
+word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
+mupdf.fz_clear_buffer(buff)
+buflen = 0  # reset char counter
+if word_delimiter:
+continue
+# append one unicode character to the word
+JM_append_rune(buff, ch.m_internal.c)
+last_char_rtl = this_char_rtl
+buflen += 1
+# enlarge word bbox
+wbbox = mupdf.fz_union_rect(wbbox, JM_char_bbox(line, ch))
+if buflen and not mupdf.fz_is_empty_rect(wbbox):
+word_n, wbbox = JM_append_word(lines, buff, wbbox, block_n, line_n, word_n)
+buflen = 0
+return lines
+def extractXHTML(self) -> str:
+"""Return page content as a XHTML string."""
+return self._extractText(4)
+def extractXML(self) -> str:
+"""Return page content as a XML string."""
+return self._extractText(3)
+def poolsize(self):
+"""TextPage current poolsize."""
+tpage = self.this
+pool = mupdf.Pool(tpage.m_internal.pool)
+size = mupdf.fz_pool_size( pool)
+pool.m_internal = None  # Ensure that pool's destructor does not free the pool.
+return size
+@property
+def rect(self):
+"""Page rectangle."""
+this_tpage = self.this
+mediabox = this_tpage.m_internal.mediabox
+val = JM_py_from_rect(mediabox)
+val = Rect(val)
+return val
+def search(self, needle, hit_max=0, quads=1):
+"""Locate 'needle' returning rects or quads."""
+val = JM_search_stext_page(self.this, needle)
+if not val:
+return val
+items = len(val)
+for i in range(items):  # change entries to quads or rects
+q = Quad(val[i])
+if quads:
+val[i] = q
+else:
+val[i] = q.rect
+if quads:
+return val
+i = 0  # join overlapping rects on the same line
+while i < items - 1:
+v1 = val[i]
+v2 = val[i + 1]
+if v1.y1 != v2.y1 or (v1 & v2).is_empty:
+i += 1
+continue  # no overlap on same line
+val[i] = v1 | v2  # join rectangles
+del val[i + 1]  # remove v2
+items -= 1  # reduce item count
+return val
+extractTEXT = extractText
+class TextWriter:
+def __init__(self, page_rect, opacity=1, color=None):
+"""Stores text spans for later output on compatible PDF pages."""
+self.this = mupdf.fz_new_text()
+self.opacity = opacity
+self.color = color
+self.rect = Rect(page_rect)
+self.ctm = Matrix(1, 0, 0, -1, 0, self.rect.height)
+self.ictm = ~self.ctm
+self.last_point = Point()
+self.last_point.__doc__ = "Position following last text insertion."
+self.text_rect = Rect()
+self.text_rect.__doc__ = "Accumulated area of text spans."
+self.used_fonts = set()
+self.thisown = True
+@property
+def _bbox(self):
+val = JM_py_from_rect( mupdf.fz_bound_text( self.this, mupdf.FzStrokeState(None), mupdf.FzMatrix()))
+val = Rect(val)
+return val
+def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left=0, small_caps=0):
+"""Store 'text' at point 'pos' using 'font' and 'fontsize'."""
+pos = Point(pos) * self.ictm
+#log( '{font=}')
+if font is None:
+font = Font("helv")
+if not font.is_writable:
+if 0:
+log( '{font.this.m_internal.name=}')
+log( '{font.this.m_internal.t3matrix=}')
+log( '{font.this.m_internal.bbox=}')
+log( '{font.this.m_internal.glyph_count=}')
+log( '{font.this.m_internal.use_glyph_bbox=}')
+log( '{font.this.m_internal.width_count=}')
+log( '{font.this.m_internal.width_default=}')
+log( '{font.this.m_internal.has_digest=}')
+log( 'Unsupported font {font.name=}')
+if mupdf_cppyy:
+import cppyy
+log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}')
+raise ValueError("Unsupported font '%s'." % font.name)
+if right_to_left:
+text = self.clean_rtl(text)
+text = "".join(reversed(text))
+right_to_left = 0
+lang = mupdf.fz_text_language_from_string(language)
+p = JM_point_from_py(pos)
+trm = mupdf.fz_make_matrix(fontsize, 0, 0, fontsize, p.x, p.y)
+markup_dir = 0
+wmode = 0
+if small_caps == 0:
+trm = mupdf.fz_show_string( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
+else:
+trm = JM_show_string_cs( self.this, font.this, trm, text, wmode, right_to_left, markup_dir, lang)
+val = JM_py_from_matrix(trm)
+self.last_point = Point(val[-2:]) * self.ctm
+self.text_rect = self._bbox * self.ctm
+val = self.text_rect, self.last_point
+if font.flags["mono"] == 1:
+self.used_fonts.add(font)
+return val
+def appendv(self, pos, text, font=None, fontsize=11, language=None, small_caps=False):
+lheight = fontsize * 1.2
+for c in text:
+self.append(pos, c, font=font, fontsize=fontsize,
+language=language, small_caps=small_caps)
+pos.y += lheight
+return self.text_rect, self.last_point
+def clean_rtl(self, text):
+"""Revert the sequence of Latin text parts.
+Text with right-to-left writing direction (Arabic, Hebrew) often
+contains Latin parts, which are written in left-to-right: numbers, names,
+etc. For output as PDF text we need *everything* in right-to-left.
+E.g. an input like "<arabic> ABCDE FG HIJ <arabic> KL <arabic>" will be
+converted to "<arabic> JIH GF EDCBA <arabic> LK <arabic>". The Arabic
+parts remain untouched.
+Args:
+text: str
+Returns:
+Massaged string.
+"""
+if not text:
+return text
+# split into words at space boundaries
+words = text.split(" ")
+idx = []
+for i in range(len(words)):
+w = words[i]
+# revert character sequence for Latin only words
+if not (len(w) < 2 or max([ord(c) for c in w]) > 255):
+words[i] = "".join(reversed(w))
+idx.append(i)  # stored index of Latin word
+# adjacent Latin words must revert their sequence, too
+idx2 = []  # store indices of adjacent Latin words
+for i in range(len(idx)):
+if idx2 == []:  # empty yet?
+idx2.append(idx[i]) # store Latin word number
+elif idx[i] > idx2[-1] + 1:  # large gap to last?
+if len(idx2) > 1:  # at least two consecutives?
+words[idx2[0] : idx2[-1] + 1] = reversed(
+words[idx2[0] : idx2[-1] + 1]
+)  # revert their sequence
+idx2 = [idx[i]]  # re-initialize
+elif idx[i] == idx2[-1] + 1:  # new adjacent Latin word
+idx2.append(idx[i])
+text = " ".join(words)
+return text
+def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix=None, render_mode=0, oc=0):
+"""Write the text to a PDF page having the TextWriter's page size.
+Args:
+page: a PDF page having same size.
+color: override text color.
+opacity: override transparency.
+overlay: put in foreground or background.
+morph: tuple(Point, Matrix), apply a matrix with a fixpoint.
+matrix: Matrix to be used instead of 'morph' argument.
+render_mode: (int) PDF render mode operator 'Tr'.
+"""
+CheckParent(page)
+if abs(self.rect - page.rect) > 1e-3:
+raise ValueError("incompatible page rect")
+if morph is not None:
+if (type(morph) not in (tuple, list)
+or type(morph[0]) is not Point
+or type(morph[1]) is not Matrix
+):
+raise ValueError("morph must be (Point, Matrix) or None")
+if matrix is not None and morph is not None:
+raise ValueError("only one of matrix, morph is allowed")
+if getattr(opacity, "__float__", None) is None or opacity == -1:
+opacity = self.opacity
+if color is None:
+color = self.color
+if 1:
+pdfpage = page._pdf_page()
+alpha = 1
+if opacity >= 0 and opacity < 1:
+alpha = opacity
+ncol = 1
+dev_color = [0, 0, 0, 0]
+if color:
+ncol, dev_color = JM_color_FromSequence(color)
+if ncol == 3:
+colorspace = mupdf.fz_device_rgb()
+elif ncol == 4:
+colorspace = mupdf.fz_device_cmyk()
+else:
+colorspace = mupdf.fz_device_gray()
+resources = mupdf.pdf_new_dict(pdfpage.doc(), 5)
+contents = mupdf.fz_new_buffer(1024)
+dev = mupdf.pdf_new_pdf_device( pdfpage.doc(), mupdf.FzMatrix(), resources, contents)
+#log( '=== {dev_color!r=}')
+mupdf.fz_fill_text(
+dev,
+self.this,
+mupdf.FzMatrix(),
+colorspace,
+dev_color,
+alpha,
+mupdf.FzColorParams(mupdf.fz_default_color_params),
+)
+mupdf.fz_close_device( dev)
+# copy generated resources into the one of the page
+max_nums = JM_merge_resources( pdfpage, resources)
+cont_string = JM_EscapeStrFromBuffer( contents)
+result = (max_nums, cont_string)
+val = result
+max_nums = val[0]
+content = val[1]
+max_alp, max_font = max_nums
+old_cont_lines = content.splitlines()
+optcont = page._get_optional_content(oc)
+if optcont is not None:
+bdc = "/OC /%s BDC" % optcont
+emc = "EMC"
+else:
+bdc = emc = ""
+new_cont_lines = ["q"]
+if bdc:
+new_cont_lines.append(bdc)
+cb = page.cropbox_position
+if page.rotation in (90, 270):
+delta = page.rect.height - page.rect.width
+else:
+delta = 0
+mb = page.mediabox
+if bool(cb) or mb.y0 != 0 or delta != 0:
+new_cont_lines.append(f"1 0 0 1 {_format_g((cb.x, cb.y + mb.y0 - delta))} cm")
+if morph:
+p = morph[0] * self.ictm
+delta = Matrix(1, 1).pretranslate(p.x, p.y)
+matrix = ~delta * morph[1] * delta
+if morph or matrix:
+new_cont_lines.append(_format_g(JM_TUPLE(matrix)) + " cm")
+for line in old_cont_lines:
+if line.endswith(" cm"):
+continue
+if line == "BT":
+new_cont_lines.append(line)
+new_cont_lines.append("%i Tr" % render_mode)
+continue
+if line.endswith(" gs"):
+alp = int(line.split()[0][4:]) + max_alp
+line = "/Alp%i gs" % alp
+elif line.endswith(" Tf"):
+temp = line.split()
+fsize = float(temp[1])
+if render_mode != 0:
+w = fsize * 0.05
+else:
+w = 1
+new_cont_lines.append(_format_g(w) + " w")
+font = int(temp[0][2:]) + max_font
+line = " ".join(["/F%i" % font] + temp[1:])
+elif line.endswith(" rg"):
+new_cont_lines.append(line.replace("rg", "RG"))
+elif line.endswith(" g"):
+new_cont_lines.append(line.replace(" g", " G"))
+elif line.endswith(" k"):
+new_cont_lines.append(line.replace(" k", " K"))
+new_cont_lines.append(line)
+if emc:
+new_cont_lines.append(emc)
+new_cont_lines.append("Q\n")
+content = "\n".join(new_cont_lines).encode("utf-8")
+TOOLS._insert_contents(page, content, overlay=overlay)
+val = None
+for font in self.used_fonts:
+repair_mono_font(page, font)
+return val
+class IRect:
+"""
+IRect() - all zeros
+IRect(x0, y0, x1, y1) - 4 coordinates
+IRect(top-left, x1, y1) - point and 2 coordinates
+IRect(x0, y0, bottom-right) - 2 coordinates and point
+IRect(top-left, bottom-right) - 2 points
+IRect(sequ) - new from sequence or rect-like
+"""
+def __add__(self, p):
+return Rect.__add__(self, p).round()
+def __and__(self, x):
+return Rect.__and__(self, x).round()
+def __contains__(self, x):
+return Rect.__contains__(self, x)
+def __eq__(self, r):
+if not hasattr(r, "__len__"):
+return False
+return len(r) == 4 and self.x0 == r[0] and self.y0 == r[1] and self.x1 == r[2] and self.y1 == r[3]
+def __getitem__(self, i):
+return (self.x0, self.y0, self.x1, self.y1)[i]
+def __hash__(self):
+return hash(tuple(self))
+def __init__(self, *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
+self.x0, self.y0, self.x1, self.y1 = util_make_irect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
+def __len__(self):
+return 4
+def __mul__(self, m):
+return Rect.__mul__(self, m).round()
+def __neg__(self):
+return IRect(-self.x0, -self.y0, -self.x1, -self.y1)
+def __or__(self, x):
+return Rect.__or__(self, x).round()
+def __pos__(self):
+return IRect(self)
+def __repr__(self):
+return "IRect" + str(tuple(self))
+def __setitem__(self, i, v):
+v = int(v)
+if   i == 0: self.x0 = v
+elif i == 1: self.y0 = v
+elif i == 2: self.x1 = v
+elif i == 3: self.y1 = v
+else:
+raise IndexError("index out of range")
+return None
+def __sub__(self, p):
+return Rect.__sub__(self, p).round()
+def __truediv__(self, m):
+return Rect.__truediv__(self, m).round()
+@property
+def bottom_left(self):
+"""Bottom-left corner."""
+return Point(self.x0, self.y1)
+@property
+def bottom_right(self):
+"""Bottom-right corner."""
+return Point(self.x1, self.y1)
+@property
+def height(self):
+return max(0, self.y1 - self.y0)
+def contains(self, x):
+"""Check if x is in the rectangle."""
+return self.__contains__(x)
+def include_point(self, p):
+"""Extend rectangle to include point p."""
+rect = self.rect.include_point(p)
+return rect.irect
+def include_rect(self, r):
+"""Extend rectangle to include rectangle r."""
+rect = self.rect.include_rect(r)
+return rect.irect
+def intersect(self, r):
+"""Restrict rectangle to intersection with rectangle r."""
+return Rect.intersect(self, r).round()
+def intersects(self, x):
+return Rect.intersects(self, x)
+@property
+def is_empty(self):
+"""True if rectangle area is empty."""
+return self.x0 >= self.x1 or self.y0 >= self.y1
+@property
+def is_infinite(self):
+"""True if rectangle is infinite."""
+return self.x0 == self.y0 == FZ_MIN_INF_RECT and self.x1 == self.y1 == FZ_MAX_INF_RECT
+@property
+def is_valid(self):
+"""True if rectangle is valid."""
+return self.x0 <= self.x1 and self.y0 <= self.y1
+def morph(self, p, m):
+"""Morph with matrix-like m and point-like p.
+Returns a new quad."""
+if self.is_infinite:
+return INFINITE_QUAD()
+return self.quad.morph(p, m)
+def norm(self):
+return math.sqrt(sum([c*c for c in self]))
+def normalize(self):
+"""Replace rectangle with its valid version."""
+if self.x1 < self.x0:
+self.x0, self.x1 = self.x1, self.x0
+if self.y1 < self.y0:
+self.y0, self.y1 = self.y1, self.y0
+return self
+@property
+def quad(self):
+"""Return Quad version of rectangle."""
+return Quad(self.tl, self.tr, self.bl, self.br)
+@property
+def rect(self):
+return Rect(self)
+@property
+def top_left(self):
+"""Top-left corner."""
+return Point(self.x0, self.y0)
+@property
+def top_right(self):
+"""Top-right corner."""
+return Point(self.x1, self.y0)
+def torect(self, r):
+"""Return matrix that converts to target rect."""
+r = Rect(r)
+if self.is_infinite or self.is_empty or r.is_infinite or r.is_empty:
+raise ValueError("rectangles must be finite and not empty")
+return (
+Matrix(1, 0, 0, 1, -self.x0, -self.y0)
+* Matrix(r.width / self.width, r.height / self.height)
+* Matrix(1, 0, 0, 1, r.x0, r.y0)
+)
+def transform(self, m):
+return Rect.transform(self, m).round()
+@property
+def width(self):
+return max(0, self.x1 - self.x0)
+br = bottom_right
+bl = bottom_left
+tl = top_left
+tr = top_right
+# Data
+#
+if 1:
+_self = sys.modules[__name__]
+if 1:
+for _name, _value in mupdf.__dict__.items():
+if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
+if _name.startswith('PDF_ENUM_NAME_'):
+# Not a simple enum.
+pass
+else:
+#assert not inspect.isroutine(value)
+#log(f'importing {_name=} {_value=}.')
+setattr(_self, _name, _value)
+#log(f'{getattr( self, name, None)=}')
+else:
+# This is slow due to importing inspect, e.g. 0.019 instead of 0.004.
+for _name, _value in inspect.getmembers(mupdf):
+if _name.startswith(('PDF_', 'UCDN_SCRIPT_')):
+if _name.startswith('PDF_ENUM_NAME_'):
+# Not a simple enum.
+pass
+else:
+#assert not inspect.isroutine(value)
+#log(f'importing {name}')
+setattr(_self, _name, _value)
+#log(f'{getattr( self, name, None)=}')
+# This is a macro so not preserved in mupdf C++/Python bindings.
+#
+PDF_SIGNATURE_DEFAULT_APPEARANCE = (0
+| mupdf.PDF_SIGNATURE_SHOW_LABELS
+| mupdf.PDF_SIGNATURE_SHOW_DN
+| mupdf.PDF_SIGNATURE_SHOW_DATE
+| mupdf.PDF_SIGNATURE_SHOW_TEXT_NAME
+| mupdf.PDF_SIGNATURE_SHOW_GRAPHIC_NAME
+| mupdf.PDF_SIGNATURE_SHOW_LOGO
+)
+#UCDN_SCRIPT_ADLAM = mupdf.UCDN_SCRIPT_ADLAM
+#setattr(self, 'UCDN_SCRIPT_ADLAM', mupdf.UCDN_SCRIPT_ADLAM)
+assert mupdf.UCDN_EAST_ASIAN_H == 1
+# Flake8 incorrectly fails next two lines because we've dynamically added
+# items to self.
+assert PDF_TX_FIELD_IS_MULTILINE == mupdf.PDF_TX_FIELD_IS_MULTILINE # noqa: F821
+assert UCDN_SCRIPT_ADLAM == mupdf.UCDN_SCRIPT_ADLAM # noqa: F821
+del _self, _name, _value
+AnyType = typing.Any
+Base14_fontnames = (
+"Courier",
+"Courier-Oblique",
+"Courier-Bold",
+"Courier-BoldOblique",
+"Helvetica",
+"Helvetica-Oblique",
+"Helvetica-Bold",
+"Helvetica-BoldOblique",
+"Times-Roman",
+"Times-Italic",
+"Times-Bold",
+"Times-BoldItalic",
+"Symbol",
+"ZapfDingbats",
+)
+Base14_fontdict = {}
+for f in Base14_fontnames:
+Base14_fontdict[f.lower()] = f
+Base14_fontdict["helv"] = "Helvetica"
+Base14_fontdict["heit"] = "Helvetica-Oblique"
+Base14_fontdict["hebo"] = "Helvetica-Bold"
+Base14_fontdict["hebi"] = "Helvetica-BoldOblique"
+Base14_fontdict["cour"] = "Courier"
+Base14_fontdict["coit"] = "Courier-Oblique"
+Base14_fontdict["cobo"] = "Courier-Bold"
+Base14_fontdict["cobi"] = "Courier-BoldOblique"
+Base14_fontdict["tiro"] = "Times-Roman"
+Base14_fontdict["tibo"] = "Times-Bold"
+Base14_fontdict["tiit"] = "Times-Italic"
+Base14_fontdict["tibi"] = "Times-BoldItalic"
+Base14_fontdict["symb"] = "Symbol"
+Base14_fontdict["zadb"] = "ZapfDingbats"
+EPSILON = 1e-5
+FLT_EPSILON = 1e-5
+# largest 32bit integers surviving C float conversion roundtrips
+# used by MuPDF to define infinite rectangles
+FZ_MIN_INF_RECT = -0x80000000
+FZ_MAX_INF_RECT = 0x7fffff80
+JM_annot_id_stem = "fitz"
+JM_mupdf_warnings_store = []
+JM_mupdf_show_errors = 1
+JM_mupdf_show_warnings = 0
+# ------------------------------------------------------------------------------
+# Image recompression constants
+# ------------------------------------------------------------------------------
+FZ_RECOMPRESS_NEVER = mupdf.FZ_RECOMPRESS_NEVER
+FZ_RECOMPRESS_SAME = mupdf.FZ_RECOMPRESS_SAME
+FZ_RECOMPRESS_LOSSLESS = mupdf.FZ_RECOMPRESS_LOSSLESS
+FZ_RECOMPRESS_JPEG = mupdf.FZ_RECOMPRESS_JPEG
+FZ_RECOMPRESS_J2K = mupdf.FZ_RECOMPRESS_J2K
+FZ_RECOMPRESS_FAX = mupdf.FZ_RECOMPRESS_FAX
+FZ_SUBSAMPLE_AVERAGE = mupdf.FZ_SUBSAMPLE_AVERAGE
+FZ_SUBSAMPLE_BICUBIC = mupdf.FZ_SUBSAMPLE_BICUBIC
+# ------------------------------------------------------------------------------
+# Various PDF Optional Content Flags
+# ------------------------------------------------------------------------------
+PDF_OC_ON = 0
+PDF_OC_TOGGLE = 1
+PDF_OC_OFF = 2
+# ------------------------------------------------------------------------------
+# link kinds and link flags
+# ------------------------------------------------------------------------------
+LINK_NONE = 0
+LINK_GOTO = 1
+LINK_URI = 2
+LINK_LAUNCH = 3
+LINK_NAMED = 4
+LINK_GOTOR = 5
+LINK_FLAG_L_VALID = 1
+LINK_FLAG_T_VALID = 2
+LINK_FLAG_R_VALID = 4
+LINK_FLAG_B_VALID = 8
+LINK_FLAG_FIT_H = 16
+LINK_FLAG_FIT_V = 32
+LINK_FLAG_R_IS_ZOOM = 64
+SigFlag_SignaturesExist = 1
+SigFlag_AppendOnly = 2
+STAMP_Approved = 0
+STAMP_AsIs = 1
+STAMP_Confidential = 2
+STAMP_Departmental = 3
+STAMP_Experimental = 4
+STAMP_Expired = 5
+STAMP_Final = 6
+STAMP_ForComment = 7
+STAMP_ForPublicRelease = 8
+STAMP_NotApproved = 9
+STAMP_NotForPublicRelease = 10
+STAMP_Sold = 11
+STAMP_TopSecret = 12
+STAMP_Draft = 13
+TEXT_ALIGN_LEFT = 0
+TEXT_ALIGN_CENTER = 1
+TEXT_ALIGN_RIGHT = 2
+TEXT_ALIGN_JUSTIFY = 3
+TEXT_FONT_SUPERSCRIPT = 1
+TEXT_FONT_ITALIC = 2
+TEXT_FONT_SERIFED = 4
+TEXT_FONT_MONOSPACED = 8
+TEXT_FONT_BOLD = 16
+TEXT_OUTPUT_TEXT = 0
+TEXT_OUTPUT_HTML = 1
+TEXT_OUTPUT_JSON = 2
+TEXT_OUTPUT_XML = 3
+TEXT_OUTPUT_XHTML = 4
+TEXT_PRESERVE_LIGATURES = mupdf.FZ_STEXT_PRESERVE_LIGATURES
+TEXT_PRESERVE_WHITESPACE = mupdf.FZ_STEXT_PRESERVE_WHITESPACE
+TEXT_PRESERVE_IMAGES = mupdf.FZ_STEXT_PRESERVE_IMAGES
+TEXT_INHIBIT_SPACES = mupdf.FZ_STEXT_INHIBIT_SPACES
+TEXT_DEHYPHENATE = mupdf.FZ_STEXT_DEHYPHENATE
+TEXT_PRESERVE_SPANS = mupdf.FZ_STEXT_PRESERVE_SPANS
+TEXT_MEDIABOX_CLIP = mupdf.FZ_STEXT_MEDIABOX_CLIP
+TEXT_USE_CID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE
+TEXT_COLLECT_STRUCTURE = mupdf.FZ_STEXT_COLLECT_STRUCTURE
+TEXT_ACCURATE_BBOXES = mupdf.FZ_STEXT_ACCURATE_BBOXES
+TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS
+TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT
+TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT
+if mupdf_version_tuple >= (1, 26):
+TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
+TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
+TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
+TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
+TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
+TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
+TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
+# 2025-05-07: Non-standard names preserved for backwards compatibility.
+TEXT_STEXT_SEGMENT = TEXT_SEGMENT
+TEXT_CID_FOR_UNKNOWN_UNICODE = TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+TEXTFLAGS_WORDS = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_BLOCKS = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_DICT = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_PRESERVE_IMAGES
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_RAWDICT = TEXTFLAGS_DICT
+TEXTFLAGS_SEARCH = (0
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_DEHYPHENATE
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_HTML = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_PRESERVE_IMAGES
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_XHTML = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_PRESERVE_IMAGES
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_XML = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+TEXTFLAGS_TEXT = (0
+| TEXT_PRESERVE_LIGATURES
+| TEXT_PRESERVE_WHITESPACE
+| TEXT_MEDIABOX_CLIP
+| TEXT_USE_CID_FOR_UNKNOWN_UNICODE
+)
+# Simple text encoding options
+TEXT_ENCODING_LATIN = 0
+TEXT_ENCODING_GREEK = 1
+TEXT_ENCODING_CYRILLIC = 2
+TOOLS_JM_UNIQUE_ID = 0
+# colorspace identifiers
+CS_RGB = 1
+CS_GRAY = 2
+CS_CMYK = 3
+# PDF Blend Modes
+PDF_BM_Color = "Color"
+PDF_BM_ColorBurn = "ColorBurn"
+PDF_BM_ColorDodge = "ColorDodge"
+PDF_BM_Darken = "Darken"
+PDF_BM_Difference = "Difference"
+PDF_BM_Exclusion = "Exclusion"
+PDF_BM_HardLight = "HardLight"
+PDF_BM_Hue = "Hue"
+PDF_BM_Lighten = "Lighten"
+PDF_BM_Luminosity = "Luminosity"
+PDF_BM_Multiply = "Multiply"
+PDF_BM_Normal = "Normal"
+PDF_BM_Overlay = "Overlay"
+PDF_BM_Saturation = "Saturation"
+PDF_BM_Screen = "Screen"
+PDF_BM_SoftLight = "Softlight"
+annot_skel = {
+"goto1": lambda a, b, c, d, e: f"<</A<</S/GoTo/D[{a} 0 R/XYZ {_format_g((b, c, d))}]>>/Rect[{e}]/BS<</W 0>>/Subtype/Link>>",
+"goto2": lambda a, b: f"<</A<</S/GoTo/D{a}>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
+"gotor1": lambda a, b, c, d, e, f, g: f"<</A<</S/GoToR/D[{a} /XYZ {_format_g((b, c, d))}]/F<</F({e})/UF({f})/Type/Filespec>>>>/Rect[{g}]/BS<</W 0>>/Subtype/Link>>",
+"gotor2": lambda a, b, c: f"<</A<</S/GoToR/D{a}/F({b})>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
+"launch": lambda a, b, c: f"<</A<</S/Launch/F<</F({a})/UF({b})/Type/Filespec>>>>/Rect[{c}]/BS<</W 0>>/Subtype/Link>>",
+"uri": lambda a, b: f"<</A<</S/URI/URI({a})>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
+"named": lambda a, b: f"<</A<</S/GoTo/D({a})/Type/Action>>/Rect[{b}]/BS<</W 0>>/Subtype/Link>>",
+}
+class FileDataError(RuntimeError):
+"""Raised for documents with file structure issues."""
+pass
+class FileNotFoundError(RuntimeError):
+"""Raised if file does not exist."""
+pass
+class EmptyFileError(FileDataError):
+"""Raised when creating documents from zero-length data."""
+pass
+# propagate exception class to C-level code
+#_set_FileDataError(FileDataError)
+csRGB = Colorspace(CS_RGB)
+csGRAY = Colorspace(CS_GRAY)
+csCMYK = Colorspace(CS_CMYK)
+# These don't appear to be visible in classic, but are used
+# internally.
+#
+dictkey_align = "align"
+dictkey_asc = "ascender"
+dictkey_bidi = "bidi"
+dictkey_bbox = "bbox"
+dictkey_blocks = "blocks"
+dictkey_bpc = "bpc"
+dictkey_c = "c"
+dictkey_chars = "chars"
+dictkey_color = "color"
+dictkey_colorspace = "colorspace"
+dictkey_content = "content"
+dictkey_creationDate = "creationDate"
+dictkey_cs_name = "cs-name"
+dictkey_da = "da"
+dictkey_dashes = "dashes"
+dictkey_descr = "description"
+dictkey_desc = "descender"
+dictkey_dir = "dir"
+dictkey_effect = "effect"
+dictkey_ext = "ext"
+dictkey_filename = "filename"
+dictkey_fill = "fill"
+dictkey_flags = "flags"
+dictkey_char_flags = "char_flags"
+dictkey_font = "font"
+dictkey_glyph = "glyph"
+dictkey_height = "height"
+dictkey_id = "id"
+dictkey_image = "image"
+dictkey_items = "items"
+dictkey_length = "length"
+dictkey_lines = "lines"
+dictkey_matrix = "transform"
+dictkey_modDate = "modDate"
+dictkey_name = "name"
+dictkey_number = "number"
+dictkey_origin = "origin"
+dictkey_rect = "rect"
+dictkey_size = "size"
+dictkey_smask = "smask"
+dictkey_spans = "spans"
+dictkey_stroke = "stroke"
+dictkey_style = "style"
+dictkey_subject = "subject"
+dictkey_text = "text"
+dictkey_title = "title"
+dictkey_type = "type"
+dictkey_ufilename = "ufilename"
+dictkey_width = "width"
+dictkey_wmode = "wmode"
+dictkey_xref = "xref"
+dictkey_xres = "xres"
+dictkey_yres = "yres"
+try:
+from pymupdf_fonts import fontdescriptors, fontbuffers
+fitz_fontdescriptors = fontdescriptors.copy()
+for k in fitz_fontdescriptors.keys():
+fitz_fontdescriptors[k]["loader"] = fontbuffers[k]
+del fontdescriptors, fontbuffers
+except ImportError:
+fitz_fontdescriptors = {}
+symbol_glyphs = (   # Glyph list for the built-in font 'Symbol'
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(32, 0.25),
+(33, 0.333),
+(34, 0.713),
+(35, 0.5),
+(36, 0.549),
+(37, 0.833),
+(38, 0.778),
+(39, 0.439),
+(40, 0.333),
+(41, 0.333),
+(42, 0.5),
+(43, 0.549),
+(44, 0.25),
+(45, 0.549),
+(46, 0.25),
+(47, 0.278),
+(48, 0.5),
+(49, 0.5),
+(50, 0.5),
+(51, 0.5),
+(52, 0.5),
+(53, 0.5),
+(54, 0.5),
+(55, 0.5),
+(56, 0.5),
+(57, 0.5),
+(58, 0.278),
+(59, 0.278),
+(60, 0.549),
+(61, 0.549),
+(62, 0.549),
+(63, 0.444),
+(64, 0.549),
+(65, 0.722),
+(66, 0.667),
+(67, 0.722),
+(68, 0.612),
+(69, 0.611),
+(70, 0.763),
+(71, 0.603),
+(72, 0.722),
+(73, 0.333),
+(74, 0.631),
+(75, 0.722),
+(76, 0.686),
+(77, 0.889),
+(78, 0.722),
+(79, 0.722),
+(80, 0.768),
+(81, 0.741),
+(82, 0.556),
+(83, 0.592),
+(84, 0.611),
+(85, 0.69),
+(86, 0.439),
+(87, 0.768),
+(88, 0.645),
+(89, 0.795),
+(90, 0.611),
+(91, 0.333),
+(92, 0.863),
+(93, 0.333),
+(94, 0.658),
+(95, 0.5),
+(96, 0.5),
+(97, 0.631),
+(98, 0.549),
+(99, 0.549),
+(100, 0.494),
+(101, 0.439),
+(102, 0.521),
+(103, 0.411),
+(104, 0.603),
+(105, 0.329),
+(106, 0.603),
+(107, 0.549),
+(108, 0.549),
+(109, 0.576),
+(110, 0.521),
+(111, 0.549),
+(112, 0.549),
+(113, 0.521),
+(114, 0.549),
+(115, 0.603),
+(116, 0.439),
+(117, 0.576),
+(118, 0.713),
+(119, 0.686),
+(120, 0.493),
+(121, 0.686),
+(122, 0.494),
+(123, 0.48),
+(124, 0.2),
+(125, 0.48),
+(126, 0.549),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(183, 0.46),
+(160, 0.25),
+(161, 0.62),
+(162, 0.247),
+(163, 0.549),
+(164, 0.167),
+(165, 0.713),
+(166, 0.5),
+(167, 0.753),
+(168, 0.753),
+(169, 0.753),
+(170, 0.753),
+(171, 1.042),
+(172, 0.713),
+(173, 0.603),
+(174, 0.987),
+(175, 0.603),
+(176, 0.4),
+(177, 0.549),
+(178, 0.411),
+(179, 0.549),
+(180, 0.549),
+(181, 0.576),
+(182, 0.494),
+(183, 0.46),
+(184, 0.549),
+(185, 0.549),
+(186, 0.549),
+(187, 0.549),
+(188, 1),
+(189, 0.603),
+(190, 1),
+(191, 0.658),
+(192, 0.823),
+(193, 0.686),
+(194, 0.795),
+(195, 0.987),
+(196, 0.768),
+(197, 0.768),
+(198, 0.823),
+(199, 0.768),
+(200, 0.768),
+(201, 0.713),
+(202, 0.713),
+(203, 0.713),
+(204, 0.713),
+(205, 0.713),
+(206, 0.713),
+(207, 0.713),
+(208, 0.768),
+(209, 0.713),
+(210, 0.79),
+(211, 0.79),
+(212, 0.89),
+(213, 0.823),
+(214, 0.549),
+(215, 0.549),
+(216, 0.713),
+(217, 0.603),
+(218, 0.603),
+(219, 1.042),
+(220, 0.987),
+(221, 0.603),
+(222, 0.987),
+(223, 0.603),
+(224, 0.494),
+(225, 0.329),
+(226, 0.79),
+(227, 0.79),
+(228, 0.786),
+(229, 0.713),
+(230, 0.384),
+(231, 0.384),
+(232, 0.384),
+(233, 0.384),
+(234, 0.384),
+(235, 0.384),
+(236, 0.494),
+(237, 0.494),
+(238, 0.494),
+(239, 0.494),
+(183, 0.46),
+(241, 0.329),
+(242, 0.274),
+(243, 0.686),
+(244, 0.686),
+(245, 0.686),
+(246, 0.384),
+(247, 0.549),
+(248, 0.384),
+(249, 0.384),
+(250, 0.384),
+(251, 0.384),
+(252, 0.494),
+(253, 0.494),
+(254, 0.494),
+(183, 0.46),
+)
+zapf_glyphs = ( # Glyph list for the built-in font 'ZapfDingbats'
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(32, 0.278),
+(33, 0.974),
+(34, 0.961),
+(35, 0.974),
+(36, 0.98),
+(37, 0.719),
+(38, 0.789),
+(39, 0.79),
+(40, 0.791),
+(41, 0.69),
+(42, 0.96),
+(43, 0.939),
+(44, 0.549),
+(45, 0.855),
+(46, 0.911),
+(47, 0.933),
+(48, 0.911),
+(49, 0.945),
+(50, 0.974),
+(51, 0.755),
+(52, 0.846),
+(53, 0.762),
+(54, 0.761),
+(55, 0.571),
+(56, 0.677),
+(57, 0.763),
+(58, 0.76),
+(59, 0.759),
+(60, 0.754),
+(61, 0.494),
+(62, 0.552),
+(63, 0.537),
+(64, 0.577),
+(65, 0.692),
+(66, 0.786),
+(67, 0.788),
+(68, 0.788),
+(69, 0.79),
+(70, 0.793),
+(71, 0.794),
+(72, 0.816),
+(73, 0.823),
+(74, 0.789),
+(75, 0.841),
+(76, 0.823),
+(77, 0.833),
+(78, 0.816),
+(79, 0.831),
+(80, 0.923),
+(81, 0.744),
+(82, 0.723),
+(83, 0.749),
+(84, 0.79),
+(85, 0.792),
+(86, 0.695),
+(87, 0.776),
+(88, 0.768),
+(89, 0.792),
+(90, 0.759),
+(91, 0.707),
+(92, 0.708),
+(93, 0.682),
+(94, 0.701),
+(95, 0.826),
+(96, 0.815),
+(97, 0.789),
+(98, 0.789),
+(99, 0.707),
+(100, 0.687),
+(101, 0.696),
+(102, 0.689),
+(103, 0.786),
+(104, 0.787),
+(105, 0.713),
+(106, 0.791),
+(107, 0.785),
+(108, 0.791),
+(109, 0.873),
+(110, 0.761),
+(111, 0.762),
+(112, 0.762),
+(113, 0.759),
+(114, 0.759),
+(115, 0.892),
+(116, 0.892),
+(117, 0.788),
+(118, 0.784),
+(119, 0.438),
+(120, 0.138),
+(121, 0.277),
+(122, 0.415),
+(123, 0.392),
+(124, 0.392),
+(125, 0.668),
+(126, 0.668),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(183, 0.788),
+(161, 0.732),
+(162, 0.544),
+(163, 0.544),
+(164, 0.91),
+(165, 0.667),
+(166, 0.76),
+(167, 0.76),
+(168, 0.776),
+(169, 0.595),
+(170, 0.694),
+(171, 0.626),
+(172, 0.788),
+(173, 0.788),
+(174, 0.788),
+(175, 0.788),
+(176, 0.788),
+(177, 0.788),
+(178, 0.788),
+(179, 0.788),
+(180, 0.788),
+(181, 0.788),
+(182, 0.788),
+(183, 0.788),
+(184, 0.788),
+(185, 0.788),
+(186, 0.788),
+(187, 0.788),
+(188, 0.788),
+(189, 0.788),
+(190, 0.788),
+(191, 0.788),
+(192, 0.788),
+(193, 0.788),
+(194, 0.788),
+(195, 0.788),
+(196, 0.788),
+(197, 0.788),
+(198, 0.788),
+(199, 0.788),
+(200, 0.788),
+(201, 0.788),
+(202, 0.788),
+(203, 0.788),
+(204, 0.788),
+(205, 0.788),
+(206, 0.788),
+(207, 0.788),
+(208, 0.788),
+(209, 0.788),
+(210, 0.788),
+(211, 0.788),
+(212, 0.894),
+(213, 0.838),
+(214, 1.016),
+(215, 0.458),
+(216, 0.748),
+(217, 0.924),
+(218, 0.748),
+(219, 0.918),
+(220, 0.927),
+(221, 0.928),
+(222, 0.928),
+(223, 0.834),
+(224, 0.873),
+(225, 0.828),
+(226, 0.924),
+(227, 0.924),
+(228, 0.917),
+(229, 0.93),
+(230, 0.931),
+(231, 0.463),
+(232, 0.883),
+(233, 0.836),
+(234, 0.836),
+(235, 0.867),
+(236, 0.867),
+(237, 0.696),
+(238, 0.696),
+(239, 0.874),
+(183, 0.788),
+(241, 0.874),
+(242, 0.76),
+(243, 0.946),
+(244, 0.771),
+(245, 0.865),
+(246, 0.771),
+(247, 0.888),
+(248, 0.967),
+(249, 0.888),
+(250, 0.831),
+(251, 0.873),
+(252, 0.927),
+(253, 0.97),
+(183, 0.788),
+(183, 0.788),
+)
+# Functions
+#
+def _read_samples( pixmap, offset, n):
+# fixme: need to be able to get a sample in one call, as a Python
+# bytes or similar.
+ret = []
+if not pixmap.samples():
+# mupdf.fz_samples_get() gives a segv if pixmap->samples is null.
+return ret
+for i in range( n):
+ret.append( mupdf.fz_samples_get( pixmap, offset + i))
+return bytes( ret)
+def _INRANGE(v, low, high):
+return low <= v and v <= high
+def _remove_dest_range(pdf, numbers):
+pagecount = mupdf.pdf_count_pages(pdf)
+for i in range(pagecount):
+n1 = i
+if n1 in numbers:
+continue
+pageref = mupdf.pdf_lookup_page_obj( pdf, i)
+annots = mupdf.pdf_dict_get( pageref, PDF_NAME('Annots'))
+if not annots.m_internal:
+continue
+len_ = mupdf.pdf_array_len(annots)
+for j in range(len_ - 1, -1, -1):
+o = mupdf.pdf_array_get( annots, j)
+if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( o, PDF_NAME('Subtype')), PDF_NAME('Link')):
+continue
+action = mupdf.pdf_dict_get( o, PDF_NAME('A'))
+dest = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
+if action.m_internal:
+if not mupdf.pdf_name_eq( mupdf.pdf_dict_get( action, PDF_NAME('S')), PDF_NAME('GoTo')):
+continue
+dest = mupdf.pdf_dict_get( action, PDF_NAME('D'))
+pno = -1
+if mupdf.pdf_is_array( dest):
+target = mupdf.pdf_array_get( dest, 0)
+pno = mupdf.pdf_lookup_page_number( pdf, target)
+elif mupdf.pdf_is_string( dest):
+location, _, _ = mupdf.fz_resolve_link( pdf.super(), mupdf.pdf_to_text_string( dest))
+pno = location.page
+if pno < 0: # page number lookup did not work
+continue
+n1 = pno
+if n1 in numbers:
+mupdf.pdf_array_delete( annots, j)
+def ASSERT_PDF(cond):
+assert isinstance(cond, (mupdf.PdfPage, mupdf.PdfDocument)), f'{type(cond)=} {cond=}'
+if not cond.m_internal:
+raise Exception(MSG_IS_NO_PDF)
+def EMPTY_IRECT():
+return IRect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
+def EMPTY_QUAD():
+return EMPTY_RECT().quad
+def EMPTY_RECT():
+return Rect(FZ_MAX_INF_RECT, FZ_MAX_INF_RECT, FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
+def ENSURE_OPERATION(pdf):
+if not JM_have_operation(pdf):
+raise Exception("No journalling operation started")
+def INFINITE_IRECT():
+return IRect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
+def INFINITE_QUAD():
+return INFINITE_RECT().quad
+def INFINITE_RECT():
+return Rect(FZ_MIN_INF_RECT, FZ_MIN_INF_RECT, FZ_MAX_INF_RECT, FZ_MAX_INF_RECT)
+def JM_BinFromBuffer(buffer_):
+'''
+Turn fz_buffer into a Python bytes object
+'''
+assert isinstance(buffer_, mupdf.FzBuffer)
+ret = mupdf.fz_buffer_extract_copy(buffer_)
+return ret
+def JM_EscapeStrFromStr(c):
+# `c` is typically from SWIG which will have converted a `const char*` from
+# C into a Python `str` using `PyUnicode_DecodeUTF8(carray, static_cast<
+# Py_ssize_t >(size), "surrogateescape")`.  This gives us a Python `str`
+# with some characters encoded as a \0xdcXY sequence, where `XY` are hex
+# digits for an invalid byte in the original `const char*`.
+#
+# This is actually a reasonable way of representing arbitrary
+# strings from C, but we want to mimic what PyMuPDF does. It uses
+# `PyUnicode_DecodeRawUnicodeEscape(c, (Py_ssize_t) strlen(c), "replace")`
+# which gives a string containing actual unicode characters for any invalid
+# bytes.
+#
+# We mimic this by converting the `str` to a `bytes` with 'surrogateescape'
+# to recognise \0xdcXY sequences, then convert the individual bytes into a
+# `str` using `chr()`.
+#
+# Would be good to have a more efficient way to do this.
+#
+if c is None:
+return ''
+assert isinstance(c, str), f'{type(c)=}'
+b = c.encode('utf8', 'surrogateescape')
+ret = ''
+for bb in b:
+ret += chr(bb)
+return ret
+def JM_BufferFromBytes(stream):
+'''
+Make fz_buffer from a PyBytes, PyByteArray or io.BytesIO object. If a text
+io.BytesIO, we convert to binary by encoding as utf8.
+'''
+if isinstance(stream, (bytes, bytearray)):
+data = stream
+elif hasattr(stream, 'getvalue'):
+data = stream.getvalue()
+if isinstance(data, str):
+data = data.encode('utf-8')
+if not isinstance(data, (bytes, bytearray)):
+raise Exception(f'.getvalue() returned unexpected type: {type(data)}')
+else:
+return mupdf.FzBuffer()
+return mupdf.fz_new_buffer_from_copied_data(data)
+def JM_FLOAT_ITEM(obj, idx):
+if not PySequence_Check(obj):
+return None
+return float(obj[idx])
+def JM_INT_ITEM(obj, idx):
+if idx < len(obj):
+temp = obj[idx]
+if isinstance(temp, (int, float)):
+return 0, temp
+return 1, None
+def JM_pixmap_from_page(doc, page, ctm, cs, alpha, annots, clip):
+'''
+Pixmap creation directly using a short-lived displaylist, so we can support
+separations.
+'''
+SPOTS_NONE = 0
+SPOTS_OVERPRINT_SIM = 1
+SPOTS_FULL = 2
+FZ_ENABLE_SPOT_RENDERING = True # fixme: this is a build-time setting in MuPDF's config.h.
+if FZ_ENABLE_SPOT_RENDERING:
+spots = SPOTS_OVERPRINT_SIM
+else:
+spots = SPOTS_NONE
+seps = None
+colorspace = cs
+matrix = JM_matrix_from_py(ctm)
+rect = mupdf.fz_bound_page(page)
+rclip = JM_rect_from_py(clip)
+rect = mupdf.fz_intersect_rect(rect, rclip) # no-op if clip is not given
+rect = mupdf.fz_transform_rect(rect, matrix)
+bbox = mupdf.fz_round_rect(rect)
+# Pixmap of the document's /OutputIntents ("output intents")
+oi = mupdf.fz_document_output_intent(doc)
+# if present and compatible, use it instead of the parameter
+if oi.m_internal:
+if mupdf.fz_colorspace_n(oi) == mupdf.fz_colorspace_n(cs):
+colorspace = mupdf.fz_keep_colorspace(oi)
+# check if spots rendering is available and if so use separations
+if spots != SPOTS_NONE:
+seps = mupdf.fz_page_separations(page)
+if seps.m_internal:
+n = mupdf.fz_count_separations(seps)
+if spots == SPOTS_FULL:
+for i in range(n):
+mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_SPOT)
+else:
+for i in range(n):
+mupdf.fz_set_separation_behavior(seps, i, mupdf.FZ_SEPARATION_COMPOSITE)
+elif mupdf.fz_page_uses_overprint(page):
+# This page uses overprint, so we need an empty
+# sep object to force the overprint simulation on.
+seps = mupdf.fz_new_separations(0)
+elif oi.m_internal and mupdf.fz_colorspace_n(oi) != mupdf.fz_colorspace_n(colorspace):
+# We have an output intent, and it's incompatible
+# with the colorspace our device needs. Force the
+# overprint simulation on, because this ensures that
+# we 'simulate' the output intent too.
+seps = mupdf.fz_new_separations(0)
+pix = mupdf.fz_new_pixmap_with_bbox(colorspace, bbox, seps, alpha)
+if alpha:
+mupdf.fz_clear_pixmap(pix)
+else:
+mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
+dev = mupdf.fz_new_draw_device(matrix, pix)
+if annots:
+mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+else:
+mupdf.fz_run_page_contents(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+return pix
+def JM_StrAsChar(x):
+# fixme: should encode, but swig doesn't pass bytes to C as const char*.
+return x
+#return x.encode('utf8')
+def JM_TUPLE(o: typing.Sequence) -> tuple:
+return tuple(map(lambda x: round(x, 5) if abs(x) >= 1e-4 else 0, o))
+def JM_TUPLE3(o: typing.Sequence) -> tuple:
+return tuple(map(lambda x: round(x, 3) if abs(x) >= 1e-3 else 0, o))
+def JM_UnicodeFromStr(s):
+if s is None:
+return ''
+if isinstance(s, bytes):
+s = s.decode('utf8')
+assert isinstance(s, str), f'{type(s)=} {s=}'
+return s
+def JM_add_annot_id(annot, stem):
+'''
+Add a unique /NM key to an annotation or widget.
+Append a number to 'stem' such that the result is a unique name.
+'''
+assert isinstance(annot, mupdf.PdfAnnot)
+page = _pdf_annot_page(annot)
+annot_obj = mupdf.pdf_annot_obj( annot)
+names = JM_get_annot_id_list(page)
+i = 0
+while 1:
+stem_id = f'{JM_annot_id_stem}-{stem}{i}'
+if stem_id not in names:
+break
+i += 1
+response = JM_StrAsChar(stem_id)
+name = mupdf.pdf_new_string( response, len(response))
+mupdf.pdf_dict_puts(annot_obj, "NM", name)
+page.doc().m_internal.resynth_required = 0
+def JM_add_oc_object(pdf, ref, xref):
+'''
+Add OC object reference to a dictionary
+'''
+indobj = mupdf.pdf_new_indirect(pdf, xref, 0)
+if not mupdf.pdf_is_dict(indobj):
+RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
+type_ = mupdf.pdf_dict_get(indobj, PDF_NAME('Type'))
+if (mupdf.pdf_objcmp(type_, PDF_NAME('OCG')) == 0
+or mupdf.pdf_objcmp(type_, PDF_NAME('OCMD')) == 0
+):
+mupdf.pdf_dict_put(ref, PDF_NAME('OC'), indobj)
+else:
+RAISEPY(MSG_BAD_OC_REF, PyExc_ValueError)
+def JM_annot_border(annot_obj):
+dash_py = list()
+style = None
+width = -1
+clouds = -1
+obj = None
+obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Border'))
+if mupdf.pdf_is_array( obj):
+width = mupdf.pdf_to_real( mupdf.pdf_array_get( obj, 2))
+if mupdf.pdf_array_len( obj) == 4:
+dash = mupdf.pdf_array_get( obj, 3)
+for i in range( mupdf.pdf_array_len( dash)):
+val = mupdf.pdf_to_int( mupdf.pdf_array_get( dash, i))
+dash_py.append( val)
+bs_o = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BS'))
+if bs_o.m_internal:
+width = mupdf.pdf_to_real( mupdf.pdf_dict_get( bs_o, PDF_NAME('W')))
+style = mupdf.pdf_to_name( mupdf.pdf_dict_get( bs_o, PDF_NAME('S')))
+if style == '':
+style = None
+obj = mupdf.pdf_dict_get( bs_o, PDF_NAME('D'))
+if obj.m_internal:
+for i in range( mupdf.pdf_array_len( obj)):
+val = mupdf.pdf_to_int( mupdf.pdf_array_get( obj, i))
+dash_py.append( val)
+obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
+if obj.m_internal:
+clouds = mupdf.pdf_to_int( mupdf.pdf_dict_get( obj, PDF_NAME('I')))
+res = dict()
+res[ dictkey_width] = width
+res[ dictkey_dashes] = tuple( dash_py)
+res[ dictkey_style] = style
+res[ 'clouds'] = clouds
+return res
+def JM_annot_colors(annot_obj):
+res = dict()
+bc = list() # stroke colors
+fc =list()  # fill colors
+o = mupdf.pdf_dict_get(annot_obj, mupdf.PDF_ENUM_NAME_C)
+if mupdf.pdf_is_array(o):
+n = mupdf.pdf_array_len(o)
+for i in range(n):
+col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
+bc.append(col)
+res[dictkey_stroke] = bc
+o = mupdf.pdf_dict_gets(annot_obj, "IC")
+if mupdf.pdf_is_array(o):
+n = mupdf.pdf_array_len(o)
+for i in range(n):
+col = mupdf.pdf_to_real( mupdf.pdf_array_get(o, i))
+fc.append(col)
+res[dictkey_fill] = fc
+return res
+def JM_annot_set_border( border, doc, annot_obj):
+assert isinstance(border, dict)
+obj = None
+dashlen = 0
+nwidth = border.get( dictkey_width)     # new width
+ndashes = border.get( dictkey_dashes)   # new dashes
+nstyle = border.get( dictkey_style)     # new style
+nclouds  = border.get( 'clouds', -1)    # new clouds value
+# get old border properties
+oborder = JM_annot_border( annot_obj)
+# delete border-related entries
+mupdf.pdf_dict_del( annot_obj, PDF_NAME('BS'))
+mupdf.pdf_dict_del( annot_obj, PDF_NAME('BE'))
+mupdf.pdf_dict_del( annot_obj, PDF_NAME('Border'))
+# populate border items: keep old values for any omitted new ones
+if nwidth < 0:
+nwidth = oborder.get( dictkey_width)    # no new width: keep current
+if ndashes is None:
+ndashes = oborder.get( dictkey_dashes)  # no new dashes: keep old
+if nstyle is None:
+nstyle  = oborder.get( dictkey_style)   # no new style: keep old
+if nclouds < 0:
+nclouds  = oborder.get( "clouds", -1)   # no new clouds: keep old
+if isinstance( ndashes, tuple) and len( ndashes) > 0:
+dashlen = len( ndashes)
+darr = mupdf.pdf_new_array( doc, dashlen)
+for d in ndashes:
+mupdf.pdf_array_push_int( darr, d)
+mupdf.pdf_dict_putl( annot_obj, darr, PDF_NAME('BS'), PDF_NAME('D'))
+mupdf.pdf_dict_putl(
+annot_obj,
+mupdf.pdf_new_real( nwidth),
+PDF_NAME('BS'),
+PDF_NAME('W'),
+)
+if dashlen == 0:
+obj = JM_get_border_style( nstyle)
+else:
+obj = PDF_NAME('D')
+mupdf.pdf_dict_putl( annot_obj, obj, PDF_NAME('BS'), PDF_NAME('S'))
+if nclouds > 0:
+mupdf.pdf_dict_put_dict( annot_obj, PDF_NAME('BE'), 2)
+obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('BE'))
+mupdf.pdf_dict_put( obj, PDF_NAME('S'), PDF_NAME('C'))
+mupdf.pdf_dict_put_int( obj, PDF_NAME('I'), nclouds)
+def make_escape(ch):
+if ch == 92:
+return "\\u005c"
+elif 32 <= ch <= 127 or ch == 10:
+return chr(ch)
+elif 0xd800 <= ch <= 0xdfff:  # orphaned surrogate
+return "\\ufffd"
+elif ch <= 0xffff:
+return "\\u%04x" % ch
+else:
+return "\\U%08x" % ch
+def JM_append_rune(buff, ch):
+"""
+APPEND non-ascii runes in unicode escape format to fz_buffer.
+"""
+mupdf.fz_append_string(buff, make_escape(ch))
+def JM_append_word(lines, buff, wbbox, block_n, line_n, word_n):
+'''
+Functions for wordlist output
+'''
+s = JM_EscapeStrFromBuffer(buff)
+litem = (
+wbbox.x0,
+wbbox.y0,
+wbbox.x1,
+wbbox.y1,
+s,
+block_n,
+line_n,
+word_n,
+)
+lines.append(litem)
+return word_n + 1, mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)   # word counter
+def JM_add_layer_config( pdf, name, creator, ON):
+'''
+Add OC configuration to the PDF catalog
+'''
+ocp = JM_ensure_ocproperties( pdf)
+configs = mupdf.pdf_dict_get( ocp, PDF_NAME('Configs'))
+if not mupdf.pdf_is_array( configs):
+configs = mupdf.pdf_dict_put_array( ocp, PDF_NAME('Configs'), 1)
+D = mupdf.pdf_new_dict( pdf, 5)
+mupdf.pdf_dict_put_text_string( D, PDF_NAME('Name'), name)
+if creator is not None:
+mupdf.pdf_dict_put_text_string( D, PDF_NAME('Creator'), creator)
+mupdf.pdf_dict_put( D, PDF_NAME('BaseState'), PDF_NAME('OFF'))
+onarray = mupdf.pdf_dict_put_array( D, PDF_NAME('ON'), 5)
+if not ON:
+pass
+else:
+ocgs = mupdf.pdf_dict_get( ocp, PDF_NAME('OCGs'))
+n = len(ON)
+for i in range(n):
+xref = 0
+e, xref = JM_INT_ITEM(ON, i)
+if e == 1:
+continue
+ind = mupdf.pdf_new_indirect( pdf, xref, 0)
+if mupdf.pdf_array_contains( ocgs, ind):
+mupdf.pdf_array_push( onarray, ind)
+mupdf.pdf_array_push( configs, D)
+def JM_char_bbox(line, ch):
+'''
+return rect of char quad
+'''
+q = JM_char_quad(line, ch)
+r = mupdf.fz_rect_from_quad(q)
+if not line.m_internal.wmode:
+return r
+if r.y1 < r.y0 + ch.m_internal.size:
+r.y0 = r.y1 - ch.m_internal.size
+return r
+def JM_char_font_flags(font, line, ch):
+flags = 0
+if line and ch:
+flags += detect_super_script(line, ch)
+flags += mupdf.fz_font_is_italic(font) * TEXT_FONT_ITALIC
+flags += mupdf.fz_font_is_serif(font) * TEXT_FONT_SERIFED
+flags += mupdf.fz_font_is_monospaced(font) * TEXT_FONT_MONOSPACED
+flags += mupdf.fz_font_is_bold(font) * TEXT_FONT_BOLD
+return flags
+def JM_char_quad(line, ch):
+'''
+re-compute char quad if ascender/descender values make no sense
+'''
+if 1 and g_use_extra:
+# This reduces time taken to extract text from PyMuPDF.pdf from 20s to
+# 15s.
+return mupdf.FzQuad(extra.JM_char_quad( line.m_internal, ch.m_internal))
+assert isinstance(line, mupdf.FzStextLine)
+assert isinstance(ch, mupdf.FzStextChar)
+if _globals.skip_quad_corrections:   # no special handling
+return ch.quad
+if line.m_internal.wmode:  # never touch vertical write mode
+return ch.quad
+font = mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font))
+asc = JM_font_ascender(font)
+dsc = JM_font_descender(font)
+fsize = ch.m_internal.size
+asc_dsc = asc - dsc + FLT_EPSILON
+if asc_dsc >= 1 and _globals.small_glyph_heights == 0:   # no problem
+return mupdf.FzQuad(ch.m_internal.quad)
+# Re-compute quad with adjusted ascender / descender values:
+# Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
+# re-rotate and move back to ch->origin location.
+fsize = ch.m_internal.size
+bbox = mupdf.fz_font_bbox(font)
+fwidth = bbox.x1 - bbox.x0
+if asc < 1e-3:  # probably Tesseract glyphless font
+dsc = -0.1
+asc = 0.9
+asc_dsc = 1.0
+if _globals.small_glyph_heights or asc_dsc < 1:
+dsc = dsc / asc_dsc
+asc = asc / asc_dsc
+asc_dsc = asc - dsc
+asc = asc * fsize / asc_dsc
+dsc = dsc * fsize / asc_dsc
+# Re-compute quad with the adjusted ascender / descender values:
+# Move ch->origin to (0,0) and de-rotate quad, then adjust the corners,
+# re-rotate and move back to ch->origin location.
+c = line.m_internal.dir.x  # cosine
+s = line.m_internal.dir.y  # sine
+trm1 = mupdf.fz_make_matrix(c, -s, s, c, 0, 0) # derotate
+trm2 = mupdf.fz_make_matrix(c, s, -s, c, 0, 0) # rotate
+if (c == -1):   # left-right flip
+trm1.d = 1
+trm2.d = 1
+xlate1 = mupdf.fz_make_matrix(1, 0, 0, 1, -ch.m_internal.origin.x, -ch.m_internal.origin.y)
+xlate2 = mupdf.fz_make_matrix(1, 0, 0, 1, ch.m_internal.origin.x, ch.m_internal.origin.y)
+quad = mupdf.fz_transform_quad(mupdf.FzQuad(ch.m_internal.quad), xlate1)    # move origin to (0,0)
+quad = mupdf.fz_transform_quad(quad, trm1) # de-rotate corners
+# adjust vertical coordinates
+if c == 1 and quad.ul.y > 0:    # up-down flip
+quad.ul.y = asc
+quad.ur.y = asc
+quad.ll.y = dsc
+quad.lr.y = dsc
+else:
+quad.ul.y = -asc
+quad.ur.y = -asc
+quad.ll.y = -dsc
+quad.lr.y = -dsc
+# adjust horizontal coordinates that are too crazy:
+# (1) left x must be >= 0
+# (2) if bbox width is 0, lookup char advance in font.
+if quad.ll.x < 0:
+quad.ll.x = 0
+quad.ul.x = 0
+cwidth = quad.lr.x - quad.ll.x
+if cwidth < FLT_EPSILON:
+glyph = mupdf.fz_encode_character( font, ch.m_internal.c)
+if glyph:
+fwidth = mupdf.fz_advance_glyph( font, glyph, line.m_internal.wmode)
+quad.lr.x = quad.ll.x + fwidth * fsize
+quad.ur.x = quad.lr.x
+quad = mupdf.fz_transform_quad(quad, trm2) # rotate back
+quad = mupdf.fz_transform_quad(quad, xlate2)   # translate back
+return quad
+def JM_choice_options(annot):
+'''
+return list of choices for list or combo boxes
+'''
+annot_obj = mupdf.pdf_annot_obj( annot.this)
+opts = mupdf.pdf_choice_widget_options2( annot, 0)
+n = len( opts)
+if n == 0:
+return  # wrong widget type
+optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Opt'))
+liste = []
+for i in range( n):
+m = mupdf.pdf_array_len( mupdf.pdf_array_get( optarr, i))
+if m == 2:
+val = (
+mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 0)),
+mupdf.pdf_to_text_string( mupdf.pdf_array_get( mupdf.pdf_array_get( optarr, i), 1)),
+)
+liste.append( val)
+else:
+val = mupdf.pdf_to_text_string( mupdf.pdf_array_get( optarr, i))
+liste.append( val)
+return liste
+def JM_clear_pixmap_rect_with_value(dest, value, b):
+'''
+Clear a pixmap rectangle - my version also supports non-alpha pixmaps
+'''
+b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox(dest))
+w = b.x1 - b.x0
+y = b.y1 - b.y0
+if w <= 0 or y <= 0:
+return 0
+destspan = dest.stride()
+destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
+# CMYK needs special handling (and potentially any other subtractive colorspaces)
+if mupdf.fz_colorspace_n(dest.colorspace()) == 4:
+value = 255 - value
+while 1:
+s = destp
+for x in range(0, w):
+mupdf.fz_samples_set(dest, s, 0)
+s += 1
+mupdf.fz_samples_set(dest, s, 0)
+s += 1
+mupdf.fz_samples_set(dest, s, 0)
+s += 1
+mupdf.fz_samples_set(dest, s, value)
+s += 1
+if dest.alpha():
+mupdf.fz_samples_set(dest, s, 255)
+s += 1
+destp += destspan
+if y == 0:
+break
+y -= 1
+return 1
+while 1:
+s = destp
+for x in range(w):
+for k in range(dest.n()-1):
+mupdf.fz_samples_set(dest, s, value)
+s += 1
+if dest.alpha():
+mupdf.fz_samples_set(dest, s, 255)
+s += 1
+else:
+mupdf.fz_samples_set(dest, s, value)
+s += 1
+destp += destspan
+if y == 0:
+break
+y -= 1
+return 1
+def JM_color_FromSequence(color):
+if isinstance(color, (int, float)):    # maybe just a single float
+color = [color]
+if not isinstance( color, (list, tuple)):
+return -1, []
+if len(color) not in (0, 1, 3, 4):
+return -1, []
+ret = color[:]
+for i in range(len(ret)):
+if ret[i] < 0 or ret[i] > 1:
+ret[i] = 1
+return len(ret), ret
+def JM_color_count( pm, clip):
+if g_use_extra:
+return extra.ll_JM_color_count(pm.m_internal, clip)
+rc = dict()
+cnt = 0
+irect = mupdf.fz_pixmap_bbox( pm)
+irect = mupdf.fz_intersect_irect(irect, mupdf.fz_round_rect(JM_rect_from_py(clip)))
+stride = pm.stride()
+width = irect.x1 - irect.x0
+height = irect.y1 - irect.y0
+n = pm.n()
+substride = width * n
+s = stride * (irect.y0 - pm.y()) + (irect.x0 - pm.x()) * n
+oldpix = _read_samples( pm, s, n)
+cnt = 0
+if mupdf.fz_is_empty_irect(irect):
+return rc
+for i in range( height):
+for j in range( 0, substride, n):
+newpix = _read_samples( pm, s + j, n)
+if newpix != oldpix:
+pixel = oldpix
+c = rc.get( pixel, None)
+if c is not None:
+cnt += c
+rc[ pixel] = cnt
+cnt = 1
+oldpix = newpix
+else:
+cnt += 1
+s += stride
+pixel = oldpix
+c = rc.get( pixel)
+if c is not None:
+cnt += c
+rc[ pixel] = cnt
+return rc
+def JM_compress_buffer(inbuffer):
+'''
+compress char* into a new buffer
+'''
+data, compressed_length = mupdf.fz_new_deflated_data_from_buffer(
+inbuffer,
+mupdf.FZ_DEFLATE_BEST,
+)
+#log( '{=data compressed_length}')
+if not data or compressed_length == 0:
+return None
+buf = mupdf.FzBuffer(mupdf.fz_new_buffer_from_data(data, compressed_length))
+mupdf.fz_resize_buffer(buf, compressed_length)
+return buf
+def JM_copy_rectangle(page, area):
+need_new_line = 0
+buffer = io.StringIO()
+for block in page:
+if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
+continue
+for line in block:
+line_had_text = 0
+for ch in line:
+r = JM_char_bbox(line, ch)
+if JM_rects_overlap(area, r):
+line_had_text = 1
+if need_new_line:
+buffer.write("\n")
+need_new_line = 0
+buffer.write(make_escape(ch.m_internal.c))
+if line_had_text:
+need_new_line = 1
+s = buffer.getvalue()   # take over the data
+return s
+def JM_convert_to_pdf(doc, fp, tp, rotate):
+'''
+Convert any MuPDF document to a PDF
+Returns bytes object containing the PDF, created via 'write' function.
+'''
+pdfout = mupdf.PdfDocument()
+incr = 1
+s = fp
+e = tp
+if fp > tp:
+incr = -1   # count backwards
+s = tp      # adjust ...
+e = fp      # ... range
+rot = JM_norm_rotation(rotate)
+i = fp
+while 1:    # interpret & write document pages as PDF pages
+if not _INRANGE(i, s, e):
+break
+page = mupdf.fz_load_page(doc, i)
+mediabox = mupdf.fz_bound_page(page)
+dev, resources, contents = mupdf.pdf_page_write(pdfout, mediabox)
+mupdf.fz_run_page(page, dev, mupdf.FzMatrix(), mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+dev = None
+page_obj = mupdf.pdf_add_page(pdfout, mediabox, rot, resources, contents)
+mupdf.pdf_insert_page(pdfout, -1, page_obj)
+i += incr
+# PDF created - now write it to Python bytearray
+# prepare write options structure
+opts = mupdf.PdfWriteOptions()
+opts.do_garbage         = 4
+opts.do_compress        = 1
+opts.do_compress_images = 1
+opts.do_compress_fonts  = 1
+opts.do_sanitize        = 1
+opts.do_incremental     = 0
+opts.do_ascii           = 0
+opts.do_decompress      = 0
+opts.do_linear          = 0
+opts.do_clean           = 1
+opts.do_pretty          = 0
+res = mupdf.fz_new_buffer(8192)
+out = mupdf.FzOutput(res)
+mupdf.pdf_write_document(pdfout, out, opts)
+out.fz_close_output()
+c = mupdf.fz_buffer_extract_copy(res)
+assert isinstance(c, bytes)
+return c
+# Copied from MuPDF v1.14
+# Create widget
+def JM_create_widget(doc, page, type, fieldname):
+old_sigflags = mupdf.pdf_to_int(mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/SigFlags"))
+#log( '*** JM_create_widget()')
+#log( f'{mupdf.pdf_create_annot_raw=}')
+#log( f'{page=}')
+#log( f'{mupdf.PDF_ANNOT_WIDGET=}')
+annot = mupdf.pdf_create_annot_raw(page, mupdf.PDF_ANNOT_WIDGET)
+annot_obj = mupdf.pdf_annot_obj(annot)
+try:
+JM_set_field_type(doc, annot_obj, type)
+mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), fieldname)
+if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
+sigflags = old_sigflags | (SigFlag_SignaturesExist | SigFlag_AppendOnly)
+mupdf.pdf_dict_putl(
+mupdf.pdf_trailer(doc),
+mupdf.pdf_new_int(sigflags),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('SigFlags'),
+)
+# pdf_create_annot will have linked the new widget into the page's
+# annot array. We also need it linked into the document's form
+form = mupdf.pdf_dict_getp(mupdf.pdf_trailer(doc), "Root/AcroForm/Fields")
+if not form.m_internal:
+form = mupdf.pdf_new_array(doc, 1)
+mupdf.pdf_dict_putl(
+mupdf.pdf_trailer(doc),
+form,
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('Fields'),
+)
+mupdf.pdf_array_push(form, annot_obj)  # Cleanup relies on this statement being last
+except Exception:
+if g_exceptions_verbose:    exception_info()
+mupdf.pdf_delete_annot(page, annot)
+if type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
+mupdf.pdf_dict_putl(
+mupdf.pdf_trailer(doc),
+mupdf.pdf_new_int(old_sigflags),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('SigFlags'),
+)
+raise
+return annot
+def JM_cropbox(page_obj):
+'''
+return a PDF page's CropBox
+'''
+if g_use_extra:
+return extra.JM_cropbox(page_obj)
+mediabox = JM_mediabox(page_obj)
+cropbox = mupdf.pdf_to_rect(
+mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('CropBox'))
+)
+if mupdf.fz_is_infinite_rect(cropbox) or mupdf.fz_is_empty_rect(cropbox):
+cropbox = mediabox
+y0 = mediabox.y1 - cropbox.y1
+y1 = mediabox.y1 - cropbox.y0
+cropbox.y0 = y0
+cropbox.y1 = y1
+return cropbox
+def JM_cropbox_size(page_obj):
+rect = JM_cropbox(page_obj)
+w = abs(rect.x1 - rect.x0)
+h = abs(rect.y1 - rect.y0)
+size = mupdf.fz_make_point(w, h)
+return size
+def JM_derotate_page_matrix(page):
+'''
+just the inverse of rotation
+'''
+mp = JM_rotate_page_matrix(page)
+return mupdf.fz_invert_matrix(mp)
+def JM_embed_file(
+pdf,
+buf,
+filename,
+ufilename,
+desc,
+compress,
+):
+'''
+embed a new file in a PDF (not only /EmbeddedFiles entries)
+'''
+len_ = 0
+val = mupdf.pdf_new_dict(pdf, 6)
+mupdf.pdf_dict_put_dict(val, PDF_NAME('CI'), 4)
+ef = mupdf.pdf_dict_put_dict(val, PDF_NAME('EF'), 4)
+mupdf.pdf_dict_put_text_string(val, PDF_NAME('F'), filename)
+mupdf.pdf_dict_put_text_string(val, PDF_NAME('UF'), ufilename)
+mupdf.pdf_dict_put_text_string(val, PDF_NAME('Desc'), desc)
+mupdf.pdf_dict_put(val, PDF_NAME('Type'), PDF_NAME('Filespec'))
+bs = b'  '
+f = mupdf.pdf_add_stream(
+pdf,
+#mupdf.fz_fz_new_buffer_from_copied_data(bs),
+mupdf.fz_new_buffer_from_copied_data(bs),
+mupdf.PdfObj(),
+0,
+)
+mupdf.pdf_dict_put(ef, PDF_NAME('F'), f)
+JM_update_stream(pdf, f, buf, compress)
+len_, _ = mupdf.fz_buffer_storage(buf)
+mupdf.pdf_dict_put_int(f, PDF_NAME('DL'), len_)
+mupdf.pdf_dict_put_int(f, PDF_NAME('Length'), len_)
+params = mupdf.pdf_dict_put_dict(f, PDF_NAME('Params'), 4)
+mupdf.pdf_dict_put_int(params, PDF_NAME('Size'), len_)
+return val
+def JM_embedded_clean(pdf):
+'''
+perform some cleaning if we have /EmbeddedFiles:
+(1) remove any /Limits if /Names exists
+(2) remove any empty /Collection
+(3) set /PageMode/UseAttachments
+'''
+root = mupdf.pdf_dict_get( mupdf.pdf_trailer( pdf), PDF_NAME('Root'))
+# remove any empty /Collection entry
+coll = mupdf.pdf_dict_get(root, PDF_NAME('Collection'))
+if coll.m_internal and mupdf.pdf_dict_len(coll) == 0:
+mupdf.pdf_dict_del(root, PDF_NAME('Collection'))
+efiles = mupdf.pdf_dict_getl(
+root,
+PDF_NAME('Names'),
+PDF_NAME('EmbeddedFiles'),
+PDF_NAME('Names'),
+)
+if efiles.m_internal:
+mupdf.pdf_dict_put_name(root, PDF_NAME('PageMode'), "UseAttachments")
+def JM_EscapeStrFromBuffer(buff):
+if not buff.m_internal:
+return ''
+s = mupdf.fz_buffer_extract_copy(buff)
+val = PyUnicode_DecodeRawUnicodeEscape(s, errors='replace')
+return val
+def JM_ensure_identity(pdf):
+'''
+Store ID in PDF trailer
+'''
+id_ = mupdf.pdf_dict_get( mupdf.pdf_trailer(pdf), PDF_NAME('ID'))
+if not id_.m_internal:
+rnd0 = mupdf.fz_memrnd2(16)
+# Need to convert raw bytes into a str to send to
+# mupdf.pdf_new_string(). chr() seems to work for this.
+rnd = ''
+for i in rnd0:
+rnd += chr(i)
+id_ = mupdf.pdf_dict_put_array( mupdf.pdf_trailer( pdf), PDF_NAME('ID'), 2)
+mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
+mupdf.pdf_array_push( id_, mupdf.pdf_new_string( rnd, len(rnd)))
+def JM_ensure_ocproperties(pdf):
+'''
+Ensure OCProperties, return /OCProperties key
+'''
+ocp = mupdf.pdf_dict_get(mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root')), PDF_NAME('OCProperties'))
+if ocp.m_internal:
+return ocp
+root = mupdf.pdf_dict_get(mupdf.pdf_trailer(pdf), PDF_NAME('Root'))
+ocp = mupdf.pdf_dict_put_dict(root, PDF_NAME('OCProperties'), 2)
+mupdf.pdf_dict_put_array(ocp, PDF_NAME('OCGs'), 0)
+D = mupdf.pdf_dict_put_dict(ocp, PDF_NAME('D'), 5)
+mupdf.pdf_dict_put_array(D, PDF_NAME('ON'), 0)
+mupdf.pdf_dict_put_array(D, PDF_NAME('OFF'), 0)
+mupdf.pdf_dict_put_array(D, PDF_NAME('Order'), 0)
+mupdf.pdf_dict_put_array(D, PDF_NAME('RBGroups'), 0)
+return ocp
+def JM_expand_fname(name):
+'''
+Make /DA string of annotation
+'''
+if not name:    return "Helv"
+if name.startswith("Co"):   return "Cour"
+if name.startswith("co"):   return "Cour"
+if name.startswith("Ti"):   return "TiRo"
+if name.startswith("ti"):   return "TiRo"
+if name.startswith("Sy"):   return "Symb"
+if name.startswith("sy"):   return "Symb"
+if name.startswith("Za"):   return "ZaDb"
+if name.startswith("za"):   return "ZaDb"
+return "Helv"
+def JM_field_type_text(wtype):
+'''
+String from widget type
+'''
+if wtype == mupdf.PDF_WIDGET_TYPE_BUTTON:
+return "Button"
+if wtype == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
+return "CheckBox"
+if wtype == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
+return "RadioButton"
+if wtype == mupdf.PDF_WIDGET_TYPE_TEXT:
+return "Text"
+if wtype == mupdf.PDF_WIDGET_TYPE_LISTBOX:
+return "ListBox"
+if wtype == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
+return "ComboBox"
+if wtype == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
+return "Signature"
+return "unknown"
+def JM_fill_pixmap_rect_with_color(dest, col, b):
+assert isinstance(dest, mupdf.FzPixmap)
+# fill a rect with a color tuple
+b = mupdf.fz_intersect_irect(b, mupdf.fz_pixmap_bbox( dest))
+w = b.x1 - b.x0
+y = b.y1 - b.y0
+if w <= 0 or y <= 0:
+return 0
+destspan = dest.stride()
+destp = destspan * (b.y0 - dest.y()) + dest.n() * (b.x0 - dest.x())
+while 1:
+s = destp
+for x in range(w):
+for i in range( dest.n()):
+mupdf.fz_samples_set(dest, s, col[i])
+s += 1
+destp += destspan
+y -= 1
+if y == 0:
+break
+return 1
+def JM_find_annot_irt(annot):
+'''
+Return the first annotation whose /IRT key ("In Response To") points to
+annot. Used to remove the response chain of a given annotation.
+'''
+assert isinstance(annot, mupdf.PdfAnnot)
+irt_annot = None    # returning this
+annot_obj = mupdf.pdf_annot_obj(annot)
+found = 0
+# loop thru MuPDF's internal annots array
+page = _pdf_annot_page(annot)
+irt_annot = mupdf.pdf_first_annot(page)
+while 1:
+assert isinstance(irt_annot, mupdf.PdfAnnot)
+if not irt_annot.m_internal:
+break
+irt_annot_obj = mupdf.pdf_annot_obj(irt_annot)
+o = mupdf.pdf_dict_gets(irt_annot_obj, 'IRT')
+if o.m_internal:
+if not mupdf.pdf_objcmp(o, annot_obj):
+found = 1
+break
+irt_annot = mupdf.pdf_next_annot(irt_annot)
+if found:
+return irt_annot
+def JM_font_ascender(font):
+'''
+need own versions of ascender / descender
+'''
+assert isinstance(font, mupdf.FzFont)
+if _globals.skip_quad_corrections:
+return 0.8
+return mupdf.fz_font_ascender(font)
+def JM_font_descender(font):
+'''
+need own versions of ascender / descender
+'''
+assert isinstance(font, mupdf.FzFont)
+if _globals.skip_quad_corrections:
+return -0.2
+ret = mupdf.fz_font_descender(font)
+return ret
+def JM_is_word_delimiter(ch, delimiters):
+"""Check if ch is an extra word delimiting character.
+"""
+if (0
+or ch <= 32
+or ch == 160
+or 0x202a <= ch <= 0x202e
+):
+# covers any whitespace plus unicodes that switch between
+# right-to-left and left-to-right languages
+return True
+if not delimiters:  # no extra delimiters provided
+return False
+char = chr(ch)
+for d in delimiters:
+if d == char:
+return True
+return False
+def JM_is_rtl_char(ch):
+if ch < 0x590 or ch > 0x900:
+return False
+return True
+def JM_font_name(font):
+assert isinstance(font, mupdf.FzFont)
+name = mupdf.fz_font_name(font)
+s = name.find('+')
+if _globals.subset_fontnames or s == -1 or s != 6:
+return name
+return name[s + 1:]
+def JM_gather_fonts(pdf, dict_, fontlist, stream_xref):
+rc = 1
+n = mupdf.pdf_dict_len(dict_)
+for i in range(n):
+refname = mupdf.pdf_dict_get_key(dict_, i)
+fontdict = mupdf.pdf_dict_get_val(dict_, i)
+if not mupdf.pdf_is_dict(fontdict):
+mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no font dict ({mupdf.pdf_to_num(fontdict)} 0 R)")
+continue
+subtype = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Subtype)
+basefont = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_BaseFont)
+if not basefont.m_internal or mupdf.pdf_is_null(basefont):
+name = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Name)
+else:
+name = basefont
+encoding = mupdf.pdf_dict_get(fontdict, mupdf.PDF_ENUM_NAME_Encoding)
+if mupdf.pdf_is_dict(encoding):
+encoding = mupdf.pdf_dict_get(encoding, mupdf.PDF_ENUM_NAME_BaseEncoding)
+xref = mupdf.pdf_to_num(fontdict)
+ext = "n/a"
+if xref:
+ext = JM_get_fontextension(pdf, xref)
+entry = (
+xref,
+ext,
+mupdf.pdf_to_name(subtype),
+JM_EscapeStrFromStr(mupdf.pdf_to_name(name)),
+mupdf.pdf_to_name(refname),
+mupdf.pdf_to_name(encoding),
+stream_xref,
+)
+fontlist.append(entry)
+return rc
+def JM_gather_forms(doc, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
+'''
+Store info of a /Form xobject in Python list
+'''
+assert isinstance(doc, mupdf.PdfDocument)
+rc = 1
+n = mupdf.pdf_dict_len(dict_)
+for i in range(n):
+refname = mupdf.pdf_dict_get_key( dict_, i)
+imagedict = mupdf.pdf_dict_get_val(dict_, i)
+if not mupdf.pdf_is_dict(imagedict):
+mupdf.fz_warn( f"'{mupdf.pdf_to_name(refname)}' is no form dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
+continue
+type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
+if not mupdf.pdf_name_eq(type_, PDF_NAME('Form')):
+continue
+o = mupdf.pdf_dict_get(imagedict, PDF_NAME('BBox'))
+m = mupdf.pdf_dict_get(imagedict, PDF_NAME('Matrix'))
+if m.m_internal:
+mat = mupdf.pdf_to_matrix(m)
+else:
+mat = mupdf.FzMatrix()
+if o.m_internal:
+bbox = mupdf.fz_transform_rect( mupdf.pdf_to_rect(o), mat)
+else:
+bbox = mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
+xref = mupdf.pdf_to_num(imagedict)
+entry = (
+xref,
+mupdf.pdf_to_name( refname),
+stream_xref,
+JM_py_from_rect(bbox),
+)
+imagelist.append(entry)
+return rc
+def JM_gather_images(doc: mupdf.PdfDocument, dict_: mupdf.PdfObj, imagelist, stream_xref: int):
+'''
+Store info of an image in Python list
+'''
+rc = 1
+n = mupdf.pdf_dict_len( dict_)
+for i in range(n):
+refname = mupdf.pdf_dict_get_key(dict_, i)
+imagedict = mupdf.pdf_dict_get_val(dict_, i)
+if not mupdf.pdf_is_dict(imagedict):
+mupdf.fz_warn(f"'{mupdf.pdf_to_name(refname)}' is no image dict ({mupdf.pdf_to_num(imagedict)} 0 R)")
+continue
+type_ = mupdf.pdf_dict_get(imagedict, PDF_NAME('Subtype'))
+if not mupdf.pdf_name_eq(type_, PDF_NAME('Image')):
+continue
+xref = mupdf.pdf_to_num(imagedict)
+gen = 0
+smask = mupdf.pdf_dict_geta(imagedict, PDF_NAME('SMask'), PDF_NAME('Mask'))
+if smask.m_internal:
+gen = mupdf.pdf_to_num(smask)
+filter_ = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Filter'), PDF_NAME('F'))
+if mupdf.pdf_is_array(filter_):
+filter_ = mupdf.pdf_array_get(filter_, 0)
+altcs = mupdf.PdfObj(0)
+cs = mupdf.pdf_dict_geta(imagedict, PDF_NAME('ColorSpace'), PDF_NAME('CS'))
+if mupdf.pdf_is_array(cs):
+cses = cs
+cs = mupdf.pdf_array_get(cses, 0)
+if (mupdf.pdf_name_eq(cs, PDF_NAME('DeviceN'))
+or mupdf.pdf_name_eq(cs, PDF_NAME('Separation'))
+):
+altcs = mupdf.pdf_array_get(cses, 2)
+if mupdf.pdf_is_array(altcs):
+altcs = mupdf.pdf_array_get(altcs, 0)
+width = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Width'), PDF_NAME('W'))
+height = mupdf.pdf_dict_geta(imagedict, PDF_NAME('Height'), PDF_NAME('H'))
+bpc = mupdf.pdf_dict_geta(imagedict, PDF_NAME('BitsPerComponent'), PDF_NAME('BPC'))
+entry = (
+xref,
+gen,
+mupdf.pdf_to_int(width),
+mupdf.pdf_to_int(height),
+mupdf.pdf_to_int(bpc),
+JM_EscapeStrFromStr(mupdf.pdf_to_name(cs)),
+JM_EscapeStrFromStr(mupdf.pdf_to_name(altcs)),
+JM_EscapeStrFromStr(mupdf.pdf_to_name(refname)),
+JM_EscapeStrFromStr(mupdf.pdf_to_name(filter_)),
+stream_xref,
+)
+imagelist.append(entry)
+return rc
+def JM_get_annot_by_xref(page, xref):
+'''
+retrieve annot by its xref
+'''
+assert isinstance(page, mupdf.PdfPage)
+found = 0
+# loop thru MuPDF's internal annots array
+annot = mupdf.pdf_first_annot(page)
+while 1:
+if not annot.m_internal:
+break
+if xref == mupdf.pdf_to_num(mupdf.pdf_annot_obj(annot)):
+found = 1
+break
+annot = mupdf.pdf_next_annot( annot)
+if not found:
+raise Exception("xref %d is not an annot of this page" % xref)
+return annot
+def JM_get_annot_by_name(page, name):
+'''
+retrieve annot by name (/NM key)
+'''
+assert isinstance(page, mupdf.PdfPage)
+if not name:
+return
+found = 0
+# loop thru MuPDF's internal annots and widget arrays
+annot = mupdf.pdf_first_annot(page)
+while 1:
+if not annot.m_internal:
+break
+response, len_ = mupdf.pdf_to_string(mupdf.pdf_dict_gets(mupdf.pdf_annot_obj(annot), "NM"))
+if name == response:
+found = 1
+break
+annot = mupdf.pdf_next_annot(annot)
+if not found:
+raise Exception("'%s' is not an annot of this page" % name)
+return annot
+def JM_get_annot_id_list(page):
+names = []
+annots = mupdf.pdf_dict_get( page.obj(), mupdf.PDF_ENUM_NAME_Annots)
+if not annots.m_internal:
+return names
+for i in range( mupdf.pdf_array_len(annots)):
+annot_obj = mupdf.pdf_array_get(annots, i)
+name = mupdf.pdf_dict_gets(annot_obj, "NM")
+if name.m_internal:
+names.append(
+mupdf.pdf_to_text_string(name)
+)
+return names
+def JM_get_annot_xref_list( page_obj):
+'''
+return the xrefs and /NM ids of a page's annots, links and fields
+'''
+if g_use_extra:
+names = extra.JM_get_annot_xref_list( page_obj)
+return names
+names = []
+annots = mupdf.pdf_dict_get( page_obj, PDF_NAME('Annots'))
+n = mupdf.pdf_array_len( annots)
+for i in range( n):
+annot_obj = mupdf.pdf_array_get( annots, i)
+xref = mupdf.pdf_to_num( annot_obj)
+subtype = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Subtype'))
+if not subtype.m_internal:
+continue    # subtype is required
+type_ = mupdf.pdf_annot_type_from_string( mupdf.pdf_to_name( subtype))
+if type_ == mupdf.PDF_ANNOT_UNKNOWN:
+continue    # only accept valid annot types
+id_ = mupdf.pdf_dict_gets( annot_obj, "NM")
+names.append( (xref, type_, mupdf.pdf_to_text_string( id_)))
+return names
+def JM_get_annot_xref_list2(page):
+page = page._pdf_page(required=False)
+if not page.m_internal:
+return list()
+return JM_get_annot_xref_list( page.obj())
+def JM_get_border_style(style):
+'''
+return pdf_obj "border style" from Python str
+'''
+val = mupdf.PDF_ENUM_NAME_S
+if style is None:
+return val
+s = style
+if   s.startswith("b") or s.startswith("B"):    val = mupdf.PDF_ENUM_NAME_B
+elif s.startswith("d") or s.startswith("D"):    val = mupdf.PDF_ENUM_NAME_D
+elif s.startswith("i") or s.startswith("I"):    val = mupdf.PDF_ENUM_NAME_I
+elif s.startswith("u") or s.startswith("U"):    val = mupdf.PDF_ENUM_NAME_U
+elif s.startswith("s") or s.startswith("S"):    val = mupdf.PDF_ENUM_NAME_S
+return val
+def JM_get_font(
+fontname,
+fontfile,
+fontbuffer,
+script,
+lang,
+ordering,
+is_bold,
+is_italic,
+is_serif,
+embed,
+):
+'''
+return a fz_font from a number of parameters
+'''
+def fertig(font):
+if not font.m_internal:
+raise RuntimeError(MSG_FONT_FAILED)
+# if font allows this, set embedding
+if not font.m_internal.flags.never_embed:
+mupdf.fz_set_font_embedding(font, embed)
+return font
+index = 0
+font = None
+if fontfile:
+#goto have_file;
+font = mupdf.fz_new_font_from_file( None, fontfile, index, 0)
+return fertig(font)
+if fontbuffer:
+#goto have_buffer;
+res = JM_BufferFromBytes(fontbuffer)
+font = mupdf.fz_new_font_from_buffer( None, res, index, 0)
+return fertig(font)
+if ordering > -1:
+# goto have_cjk;
+font = mupdf.fz_new_cjk_font(ordering)
+return fertig(font)
+if fontname:
+# goto have_base14;
+# Base-14 or a MuPDF builtin font
+font = mupdf.fz_new_base14_font(fontname)
+if font.m_internal:
+return fertig(font)
+font = mupdf.fz_new_builtin_font(fontname, is_bold, is_italic)
+return fertig(font)
+# Check for NOTO font
+#have_noto:;
+data, size, index = mupdf.fz_lookup_noto_font( script, lang)
+font = None
+if data:
+font = mupdf.fz_new_font_from_memory( None, data, size, index, 0)
+if font.m_internal:
+return fertig(font)
+font = mupdf.fz_load_fallback_font( script, lang, is_serif, is_bold, is_italic)
+return fertig(font)
+def JM_get_fontbuffer(doc, xref):
+'''
+Return the contents of a font file, identified by xref
+'''
+if xref < 1:
+return
+o = mupdf.pdf_load_object(doc, xref)
+desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
+if desft.m_internal:
+obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
+obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
+else:
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
+if not obj.m_internal:
+message(f"invalid font - FontDescriptor missing")
+return
+o = obj
+stream = None
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
+if obj.m_internal:
+stream = obj    # ext = "pfa"
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
+if obj.m_internal:
+stream = obj    # ext = "ttf"
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
+if obj.m_internal:
+stream = obj
+obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
+if obj.m_internal and not mupdf.pdf_is_name(obj):
+message("invalid font descriptor subtype")
+return
+if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
+pass    # Prev code did: ext = "cff", but this has no effect.
+elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
+pass    # Prev code did: ext = "cid", but this has no effect.
+elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
+pass    # Prev code did: ext = "otf", but this has no effect. */
+else:
+message('warning: unhandled font type {pdf_to_name(ctx, obj)!r}')
+if not stream:
+message('warning: unhandled font type')
+return
+return mupdf.pdf_load_stream(stream)
+def JM_get_resource_properties(ref):
+'''
+Return the items of Resources/Properties (used for Marked Content)
+Argument may be e.g. a page object or a Form XObject
+'''
+properties = mupdf.pdf_dict_getl(ref, PDF_NAME('Resources'), PDF_NAME('Properties'))
+if not properties.m_internal:
+return ()
+else:
+n = mupdf.pdf_dict_len(properties)
+if n < 1:
+return ()
+rc = []
+for i in range(n):
+key = mupdf.pdf_dict_get_key(properties, i)
+val = mupdf.pdf_dict_get_val(properties, i)
+c = mupdf.pdf_to_name(key)
+xref = mupdf.pdf_to_num(val)
+rc.append((c, xref))
+return rc
+def JM_get_widget_by_xref( page, xref):
+'''
+retrieve widget by its xref
+'''
+found = False
+annot = mupdf.pdf_first_widget( page)
+while annot.m_internal:
+annot_obj = mupdf.pdf_annot_obj( annot)
+if xref == mupdf.pdf_to_num( annot_obj):
+found = True
+break
+annot = mupdf.pdf_next_widget( annot)
+if not found:
+raise Exception( f"xref {xref} is not a widget of this page")
+return Annot( annot)
+def JM_get_widget_properties(annot, Widget):
+'''
+Populate a Python Widget object with the values from a PDF form field.
+Called by "Page.first_widget" and "Widget.next".
+'''
+#log( '{type(annot)=}')
+annot_obj = mupdf.pdf_annot_obj(annot.this)
+#log( 'Have called mupdf.pdf_annot_obj()')
+page = _pdf_annot_page(annot.this)
+pdf = page.doc()
+tw = annot
+def SETATTR(key, value):
+setattr(Widget, key, value)
+def SETATTR_DROP(mod, key, value):
+# Original C code for this function deletes if PyObject* is NULL. We
+# don't have a representation for that in Python - e.g. None is not
+# represented by NULL.
+setattr(mod, key, value)
+#log( '=== + mupdf.pdf_widget_type(tw)')
+field_type = mupdf.pdf_widget_type(tw.this)
+#log( '=== - mupdf.pdf_widget_type(tw)')
+Widget.field_type = field_type
+if field_type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
+if mupdf.pdf_signature_is_signed(pdf, annot_obj):
+SETATTR("is_signed", True)
+else:
+SETATTR("is_signed",False)
+else:
+SETATTR("is_signed", None)
+SETATTR_DROP(Widget, "border_style", JM_UnicodeFromStr(mupdf.pdf_field_border_style(annot_obj)))
+SETATTR_DROP(Widget, "field_type_string", JM_UnicodeFromStr(JM_field_type_text(field_type)))
+field_name = mupdf.pdf_load_field_name(annot_obj)
+SETATTR_DROP(Widget, "field_name", field_name)
+def pdf_dict_get_inheritable_nonempty_label(node, key):
+'''
+This is a modified version of MuPDF's pdf_dict_get_inheritable(), with
+some changes:
+* Returns string from pdf_to_text_string() or None if not found.
+* Recurses to parent if current node exists but with empty string
+value.
+'''
+slow = node
+halfbeat = 11   # Don't start moving slow pointer for a while.
+while 1:
+if not node.m_internal:
+return
+val = mupdf.pdf_dict_get(node, key)
+if val.m_internal:
+label = mupdf.pdf_to_text_string(val)
+if label:
+return label
+node = mupdf.pdf_dict_get(node, PDF_NAME('Parent'))
+if node.m_internal == slow.m_internal:
+raise Exception("cycle in resources")
+halfbeat -= 1
+if halfbeat == 0:
+slow = mupdf.pdf_dict_get(slow, PDF_NAME('Parent'))
+halfbeat = 2
+# In order to address #3950, we use our modified pdf_dict_get_inheritable()
+# to ignore empty-string child values.
+label = pdf_dict_get_inheritable_nonempty_label(annot_obj, PDF_NAME('TU'))
+if label is not None:
+SETATTR_DROP(Widget, "field_label", label)
+fvalue = None
+if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
+obj = mupdf.pdf_dict_get( annot_obj, PDF_NAME('Parent'))    # owning RB group
+if obj.m_internal:
+SETATTR_DROP(Widget, "rb_parent", mupdf.pdf_to_num( obj))
+obj = mupdf.pdf_dict_get(annot_obj, PDF_NAME('AS'))
+if obj.m_internal:
+fvalue = mupdf.pdf_to_name(obj)
+if not fvalue:
+fvalue = mupdf.pdf_field_value(annot_obj)
+SETATTR_DROP(Widget, "field_value", JM_UnicodeFromStr(fvalue))
+SETATTR_DROP(Widget, "field_display", mupdf.pdf_field_display(annot_obj))
+border_width = mupdf.pdf_to_real(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('W')))
+if border_width == 0:
+border_width = 1
+SETATTR_DROP(Widget, "border_width", border_width)
+obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('BS'), PDF_NAME('D'))
+if mupdf.pdf_is_array(obj):
+n = mupdf.pdf_array_len(obj)
+d = [0] * n
+for i in range(n):
+d[i] = mupdf.pdf_to_int(mupdf.pdf_array_get(obj, i))
+SETATTR_DROP(Widget, "border_dashes", d)
+SETATTR_DROP(Widget, "text_maxlen", mupdf.pdf_text_widget_max_len(tw.this))
+SETATTR_DROP(Widget, "text_format", mupdf.pdf_text_widget_format(tw.this))
+obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BG'))
+if mupdf.pdf_is_array(obj):
+n = mupdf.pdf_array_len(obj)
+col = [0] * n
+for i in range(n):
+col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
+SETATTR_DROP(Widget, "fill_color", col)
+obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('BC'))
+if mupdf.pdf_is_array(obj):
+n = mupdf.pdf_array_len(obj)
+col = [0] * n
+for i in range(n):
+col[i] = mupdf.pdf_to_real(mupdf.pdf_array_get(obj, i))
+SETATTR_DROP(Widget, "border_color", col)
+SETATTR_DROP(Widget, "choice_values", JM_choice_options(annot))
+da = mupdf.pdf_to_text_string(mupdf.pdf_dict_get_inheritable(annot_obj, PDF_NAME('DA')))
+SETATTR_DROP(Widget, "_text_da", JM_UnicodeFromStr(da))
+obj = mupdf.pdf_dict_getl(annot_obj, PDF_NAME('MK'), PDF_NAME('CA'))
+if obj.m_internal:
+SETATTR_DROP(Widget, "button_caption", JM_UnicodeFromStr(mupdf.pdf_to_text_string(obj)))
+SETATTR_DROP(Widget, "field_flags", mupdf.pdf_field_flags(annot_obj))
+# call Py method to reconstruct text color, font name, size
+Widget._parse_da()
+# extract JavaScript action texts
+s = mupdf.pdf_dict_get(annot_obj, PDF_NAME('A'))
+ss = JM_get_script(s)
+SETATTR_DROP(Widget, "script", ss)
+SETATTR_DROP(Widget, "script_stroke",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('K')))
+)
+SETATTR_DROP(Widget, "script_format",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('F')))
+)
+SETATTR_DROP(Widget, "script_change",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('V')))
+)
+SETATTR_DROP(Widget, "script_calc",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), PDF_NAME('C')))
+)
+SETATTR_DROP(Widget, "script_blur",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl')))
+)
+SETATTR_DROP(Widget, "script_focus",
+JM_get_script(mupdf.pdf_dict_getl(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo')))
+)
+def JM_get_fontextension(doc, xref):
+'''
+Return the file extension of a font file, identified by xref
+'''
+if xref < 1:
+return "n/a"
+o = mupdf.pdf_load_object(doc, xref)
+desft = mupdf.pdf_dict_get(o, PDF_NAME('DescendantFonts'))
+if desft.m_internal:
+obj = mupdf.pdf_resolve_indirect(mupdf.pdf_array_get(desft, 0))
+obj = mupdf.pdf_dict_get(obj, PDF_NAME('FontDescriptor'))
+else:
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontDescriptor'))
+if not obj.m_internal:
+return "n/a"    # this is a base-14 font
+o = obj # we have the FontDescriptor
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile'))
+if obj.m_internal:
+return "pfa"
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile2'))
+if obj.m_internal:
+return "ttf"
+obj = mupdf.pdf_dict_get(o, PDF_NAME('FontFile3'))
+if obj.m_internal:
+obj = mupdf.pdf_dict_get(obj, PDF_NAME('Subtype'))
+if obj.m_internal and not mupdf.pdf_is_name(obj):
+message("invalid font descriptor subtype")
+return "n/a"
+if mupdf.pdf_name_eq(obj, PDF_NAME('Type1C')):
+return "cff"
+elif mupdf.pdf_name_eq(obj, PDF_NAME('CIDFontType0C')):
+return "cid"
+elif mupdf.pdf_name_eq(obj, PDF_NAME('OpenType')):
+return "otf"
+else:
+message("unhandled font type '%s'", mupdf.pdf_to_name(obj))
+return "n/a"
+def JM_get_ocg_arrays_imp(arr):
+'''
+Get OCG arrays from OC configuration
+Returns dict {"basestate":name, "on":list, "off":list, "rbg":list, "locked":list}
+'''
+list_ = list()
+if mupdf.pdf_is_array( arr):
+n = mupdf.pdf_array_len( arr)
+for i in range(n):
+obj = mupdf.pdf_array_get( arr, i)
+item = mupdf.pdf_to_num( obj)
+if item not in list_:
+list_.append(item)
+return list_
+def JM_get_ocg_arrays(conf):
+rc = dict()
+arr = mupdf.pdf_dict_get( conf, PDF_NAME('ON'))
+list_ = JM_get_ocg_arrays_imp( arr)
+if list_:
+rc["on"] = list_
+arr = mupdf.pdf_dict_get( conf, PDF_NAME('OFF'))
+list_ = JM_get_ocg_arrays_imp( arr)
+if list_:
+rc["off"] = list_
+arr = mupdf.pdf_dict_get( conf, PDF_NAME('Locked'))
+list_ = JM_get_ocg_arrays_imp( arr)
+if list_:
+rc['locked'] = list_
+list_ = list()
+arr = mupdf.pdf_dict_get( conf, PDF_NAME('RBGroups'))
+if mupdf.pdf_is_array( arr):
+n = mupdf.pdf_array_len( arr)
+for i in range(n):
+obj = mupdf.pdf_array_get( arr, i)
+list1 = JM_get_ocg_arrays_imp( obj)
+list_.append(list1)
+if list_:
+rc["rbgroups"] = list_
+obj = mupdf.pdf_dict_get( conf, PDF_NAME('BaseState'))
+if obj.m_internal:
+state = mupdf.pdf_to_name( obj)
+rc["basestate"] = state
+return rc
+def JM_get_page_labels(liste, nums):
+n = mupdf.pdf_array_len(nums)
+for i in range(0, n, 2):
+key = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i))
+pno = mupdf.pdf_to_int(key)
+val = mupdf.pdf_resolve_indirect( mupdf.pdf_array_get(nums, i + 1))
+res = JM_object_to_buffer(val, 1, 0)
+c = mupdf.fz_buffer_extract(res)
+assert isinstance(c, bytes)
+c = c.decode('utf-8')
+liste.append( (pno, c))
+def JM_get_script(key):
+'''
+JavaScript extractor
+Returns either the script source or None. Parameter is a PDF action
+dictionary, which must have keys /S and /JS. The value of /S must be
+'/JavaScript'. The value of /JS is returned.
+'''
+if not key.m_internal:
+return
+j = mupdf.pdf_dict_get(key, PDF_NAME('S'))
+jj = mupdf.pdf_to_name(j)
+if jj == "JavaScript":
+js = mupdf.pdf_dict_get(key, PDF_NAME('JS'))
+if not js.m_internal:
+return
+else:
+return
+if mupdf.pdf_is_string(js):
+script = JM_UnicodeFromStr(mupdf.pdf_to_text_string(js))
+elif mupdf.pdf_is_stream(js):
+res = mupdf.pdf_load_stream(js)
+script = JM_EscapeStrFromBuffer(res)
+else:
+return
+if script:  # do not return an empty script
+return script
+return
+def JM_have_operation(pdf):
+'''
+Ensure valid journalling state
+'''
+if pdf.m_internal.journal and not mupdf.pdf_undoredo_step(pdf, 0):
+return 0
+return 1
+def JM_image_extension(type_):
+'''
+return extension for MuPDF image type
+'''
+if type_ == mupdf.FZ_IMAGE_FAX:     return "fax"
+if type_ == mupdf.FZ_IMAGE_RAW:     return "raw"
+if type_ == mupdf.FZ_IMAGE_FLATE:   return "flate"
+if type_ == mupdf.FZ_IMAGE_LZW:     return "lzw"
+if type_ == mupdf.FZ_IMAGE_RLD:     return "rld"
+if type_ == mupdf.FZ_IMAGE_BMP:     return "bmp"
+if type_ == mupdf.FZ_IMAGE_GIF:     return "gif"
+if type_ == mupdf.FZ_IMAGE_JBIG2:   return "jb2"
+if type_ == mupdf.FZ_IMAGE_JPEG:    return "jpeg"
+if type_ == mupdf.FZ_IMAGE_JPX:     return "jpx"
+if type_ == mupdf.FZ_IMAGE_JXR:     return "jxr"
+if type_ == mupdf.FZ_IMAGE_PNG:     return "png"
+if type_ == mupdf.FZ_IMAGE_PNM:     return "pnm"
+if type_ == mupdf.FZ_IMAGE_TIFF:    return "tiff"
+#if type_ == mupdf.FZ_IMAGE_PSD:     return "psd"
+return "n/a"
+# fixme: need to avoid using a global for this.
+g_img_info = None
+def JM_image_filter(opaque, ctm, name, image):
+assert isinstance(ctm, mupdf.FzMatrix)
+r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
+q = mupdf.fz_transform_quad( mupdf.fz_quad_from_rect(r), ctm)
+q = mupdf.fz_transform_quad( q, g_img_info_matrix)
+temp = name, JM_py_from_quad(q)
+g_img_info.append(temp)
+def JM_image_profile( imagedata, keep_image):
+'''
+Return basic properties of an image provided as bytes or bytearray
+The function creates an fz_image and optionally returns it.
+'''
+if not imagedata:
+return None # nothing given
+len_ = len( imagedata)
+if len_ < 8:
+message( "bad image data")
+return None
+c = imagedata
+#log( 'calling mfz_recognize_image_format with {c!r=}')
+type_ = mupdf.fz_recognize_image_format( c)
+if type_ == mupdf.FZ_IMAGE_UNKNOWN:
+return None
+if keep_image:
+res = mupdf.fz_new_buffer_from_copied_data( c, len_)
+else:
+res = mupdf.fz_new_buffer_from_shared_data( c, len_)
+image = mupdf.fz_new_image_from_buffer( res)
+ctm = mupdf.fz_image_orientation_matrix( image)
+xres, yres = mupdf.fz_image_resolution(image)
+orientation = mupdf.fz_image_orientation( image)
+cs_name = mupdf.fz_colorspace_name( image.colorspace())
+result = dict()
+result[ dictkey_width] = image.w()
+result[ dictkey_height] = image.h()
+result[ "orientation"] = orientation
+result[ dictkey_matrix] = JM_py_from_matrix(ctm)
+result[ dictkey_xres] = xres
+result[ dictkey_yres] = yres
+result[ dictkey_colorspace] = image.n()
+result[ dictkey_bpc] = image.bpc()
+result[ dictkey_ext] = JM_image_extension(type_)
+result[ dictkey_cs_name] = cs_name
+if keep_image:
+result[ dictkey_image] = image
+return result
+def JM_image_reporter(page):
+doc = page.doc()
+global g_img_info_matrix
+g_img_info_matrix = mupdf.FzMatrix()
+mediabox = mupdf.FzRect()
+mupdf.pdf_page_transform(page, mediabox, g_img_info_matrix)
+class SanitizeFilterOptions(mupdf.PdfSanitizeFilterOptions2):
+def __init__(self):
+super().__init__()
+self.use_virtual_image_filter()
+def image_filter(self, ctx, ctm, name, image, scissor):
+JM_image_filter(None, mupdf.FzMatrix(ctm), name, image)
+sanitize_filter_options = SanitizeFilterOptions()
+filter_options = _make_PdfFilterOptions(
+instance_forms=1,
+ascii=1,
+no_update=1,
+sanitize=1,
+sopts=sanitize_filter_options,
+)
+global g_img_info
+g_img_info = []
+mupdf.pdf_filter_page_contents( doc, page, filter_options)
+rc = tuple(g_img_info)
+g_img_info = []
+return rc
+def JM_fitz_config():
+have_TOFU           = not hasattr(mupdf, 'TOFU')
+have_TOFU_BASE14    = not hasattr(mupdf, 'TOFU_BASE14')
+have_TOFU_CJK       = not hasattr(mupdf, 'TOFU_CJK')
+have_TOFU_CJK_EXT   = not hasattr(mupdf, 'TOFU_CJK_EXT')
+have_TOFU_CJK_LANG  = not hasattr(mupdf, 'TOFU_CJK_LANG')
+have_TOFU_EMOJI     = not hasattr(mupdf, 'TOFU_EMOJI')
+have_TOFU_HISTORIC  = not hasattr(mupdf, 'TOFU_HISTORIC')
+have_TOFU_SIL       = not hasattr(mupdf, 'TOFU_SIL')
+have_TOFU_SYMBOL    = not hasattr(mupdf, 'TOFU_SYMBOL')
+ret = dict()
+ret["base14"]           = have_TOFU_BASE14
+ret["cbz"]              = bool(mupdf.FZ_ENABLE_CBZ)
+ret["epub"]             = bool(mupdf.FZ_ENABLE_EPUB)
+ret["html"]             = bool(mupdf.FZ_ENABLE_HTML)
+ret["icc"]              = bool(mupdf.FZ_ENABLE_ICC)
+ret["img"]              = bool(mupdf.FZ_ENABLE_IMG)
+ret["jpx"]              = bool(mupdf.FZ_ENABLE_JPX)
+ret["js"]               = bool(mupdf.FZ_ENABLE_JS)
+ret["pdf"]              = bool(mupdf.FZ_ENABLE_PDF)
+ret["plotter-cmyk"]     = bool(mupdf.FZ_PLOTTERS_CMYK)
+ret["plotter-g"]        = bool(mupdf.FZ_PLOTTERS_G)
+ret["plotter-n"]        = bool(mupdf.FZ_PLOTTERS_N)
+ret["plotter-rgb"]      = bool(mupdf.FZ_PLOTTERS_RGB)
+ret["py-memory"]        = bool(JM_MEMORY)
+ret["svg"]              = bool(mupdf.FZ_ENABLE_SVG)
+ret["tofu"]             = have_TOFU
+ret["tofu-cjk"]         = have_TOFU_CJK
+ret["tofu-cjk-ext"]     = have_TOFU_CJK_EXT
+ret["tofu-cjk-lang"]    = have_TOFU_CJK_LANG
+ret["tofu-emoji"]       = have_TOFU_EMOJI
+ret["tofu-historic"]    = have_TOFU_HISTORIC
+ret["tofu-sil"]         = have_TOFU_SIL
+ret["tofu-symbol"]      = have_TOFU_SYMBOL
+ret["xps"]              = bool(mupdf.FZ_ENABLE_XPS)
+return ret
+def JM_insert_contents(pdf, pageref, newcont, overlay):
+'''
+Insert a buffer as a new separate /Contents object of a page.
+1. Create a new stream object from buffer 'newcont'
+2. If /Contents already is an array, then just prepend or append this object
+3. Else, create new array and put old content obj and this object into it.
+If the page had no /Contents before, just create a 1-item array.
+'''
+contents = mupdf.pdf_dict_get(pageref, PDF_NAME('Contents'))
+newconts = mupdf.pdf_add_stream(pdf, newcont, mupdf.PdfObj(), 0)
+xref = mupdf.pdf_to_num(newconts)
+if mupdf.pdf_is_array(contents):
+if overlay:  # append new object
+mupdf.pdf_array_push(contents, newconts)
+else:   # prepend new object
+mupdf.pdf_array_insert(contents, newconts, 0)
+else:
+carr = mupdf.pdf_new_array(pdf, 5)
+if overlay:
+if contents.m_internal:
+mupdf.pdf_array_push(carr, contents)
+mupdf.pdf_array_push(carr, newconts)
+else:
+mupdf.pdf_array_push(carr, newconts)
+if contents.m_internal:
+mupdf.pdf_array_push(carr, contents)
+mupdf.pdf_dict_put(pageref, PDF_NAME('Contents'), carr)
+return xref
+def JM_insert_font(pdf, bfname, fontfile, fontbuffer, set_simple, idx, wmode, serif, encoding, ordering):
+'''
+Insert a font in a PDF
+'''
+font = None
+res = None
+data = None
+ixref = 0
+index = 0
+simple = 0
+value=None
+name=None
+subt=None
+exto = None
+ENSURE_OPERATION(pdf)
+# check for CJK font
+if ordering > -1:
+data, size, index = mupdf.fz_lookup_cjk_font(ordering)
+if data:
+font = mupdf.fz_new_font_from_memory(None, data, size, index, 0)
+font_obj = mupdf.pdf_add_cjk_font(pdf, font, ordering, wmode, serif)
+exto = "n/a"
+simple = 0
+#goto weiter;
+else:
+# check for PDF Base-14 font
+if bfname:
+data, size = mupdf.fz_lookup_base14_font(bfname)
+if data:
+font = mupdf.fz_new_font_from_memory(bfname, data, size, 0, 0)
+font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
+exto = "n/a"
+simple = 1
+#goto weiter;
+else:
+if fontfile:
+font = mupdf.fz_new_font_from_file(None, fontfile, idx, 0)
+else:
+res = JM_BufferFromBytes(fontbuffer)
+if not res.m_internal:
+RAISEPY(MSG_FILE_OR_BUFFER, PyExc_ValueError)
+font = mupdf.fz_new_font_from_buffer(None, res, idx, 0)
+if not set_simple:
+font_obj = mupdf.pdf_add_cid_font(pdf, font)
+simple = 0
+else:
+font_obj = mupdf.pdf_add_simple_font(pdf, font, encoding)
+simple = 2
+#weiter: ;
+ixref = mupdf.pdf_to_num(font_obj)
+name = JM_EscapeStrFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get(font_obj, PDF_NAME('BaseFont'))))
+subt = JM_UnicodeFromStr( mupdf.pdf_to_name( mupdf.pdf_dict_get( font_obj, PDF_NAME('Subtype'))))
+if not exto:
+exto = JM_UnicodeFromStr(JM_get_fontextension(pdf, ixref))
+asc = mupdf.fz_font_ascender(font)
+dsc = mupdf.fz_font_descender(font)
+value = [
+ixref,
+{
+"name": name,        # base font name
+"type": subt,        # subtype
+"ext": exto,         # file extension
+"simple": bool(simple), # simple font?
+"ordering": ordering, # CJK font?
+"ascender": asc,
+"descender": dsc,
+},
+]
+return value
+def JM_irect_from_py(r):
+'''
+PySequence to mupdf.FzIrect. Default: infinite irect
+'''
+if isinstance(r, mupdf.FzIrect):
+return r
+if isinstance(r, IRect):
+r = mupdf.FzIrect( r.x0, r.y0, r.x1, r.y1)
+return r
+if isinstance(r, Rect):
+ret = mupdf.FzRect(r.x0, r.y0, r.x1, r.y1)
+ret = mupdf.FzIrect(ret)  # Uses fz_irect_from_rect().
+return ret
+if isinstance(r, mupdf.FzRect):
+ret = mupdf.FzIrect(r)  # Uses fz_irect_from_rect().
+return ret
+if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
+return mupdf.FzIrect(mupdf.fz_infinite_irect)
+f = [0, 0, 0, 0]
+for i in range(4):
+f[i] = r[i]
+if f[i] is None:
+return mupdf.FzIrect(mupdf.fz_infinite_irect)
+if f[i] < FZ_MIN_INF_RECT:
+f[i] = FZ_MIN_INF_RECT
+if f[i] > FZ_MAX_INF_RECT:
+f[i] = FZ_MAX_INF_RECT
+return mupdf.fz_make_irect(f[0], f[1], f[2], f[3])
+def JM_listbox_value( annot):
+'''
+ListBox retrieve value
+'''
+# may be single value or array
+annot_obj = mupdf.pdf_annot_obj( annot)
+optarr = mupdf.pdf_dict_get( annot_obj, PDF_NAME('V'))
+if mupdf.pdf_is_string( optarr):   # a single string
+return mupdf.pdf_to_text_string( optarr)
+# value is an array (may have len 0)
+n = mupdf.pdf_array_len( optarr)
+liste = []
+# extract a list of strings
+# each entry may again be an array: take second entry then
+for i in range( n):
+elem = mupdf.pdf_array_get( optarr, i)
+if mupdf.pdf_is_array( elem):
+elem = mupdf.pdf_array_get( elem, 1)
+liste.append( JM_UnicodeFromStr( mupdf.pdf_to_text_string( elem)))
+return liste
+def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
+# PyMuPDF uses a fz_buffer to build up the string, but it's non-trivial to
+# convert the fz_buffer's `unsigned char*` into a `const char*` suitable
+# for passing to pdf_dict_put_text_string(). So instead we build up the
+# string directly in Python.
+buf = ''
+if ncol < 1:
+buf += f'0 g '
+elif ncol == 1:
+buf += f'{col[0]:g} g '
+elif ncol == 2:
+assert 0
+elif ncol == 3:
+buf += f'{col[0]:g} {col[1]:g} {col[2]:g} rg '
+else:
+buf += f'{col[0]:g} {col[1]:g} {col[2]:g} {col[3]:g} k '
+buf += f'/{JM_expand_fname(fontname)} {fontsize} Tf'
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(annot), mupdf.PDF_ENUM_NAME_DA, buf)
+def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
+if g_use_extra:
+return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
+char_list = None
+span_list = []
+mupdf.fz_clear_buffer(buff)
+span_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
+line_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
+class char_style:
+def __init__(self, rhs=None):
+if rhs:
+self.size = rhs.size
+self.flags = rhs.flags
+if mupdf_version_tuple >= (1, 25, 2):
+self.char_flags = rhs.char_flags
+self.font = rhs.font
+self.argb = rhs.argb
+self.asc = rhs.asc
+self.desc = rhs.desc
+self.bidi = rhs.bidi
+else:
+self.size = -1
+self.flags = -1
+if mupdf_version_tuple >= (1, 25, 2):
+self.char_flags = -1
+self.font = ''
+self.argb = -1
+self.asc = 0
+self.desc = 0
+self.bidi = 0
+def __str__(self):
+ret = f'{self.size} {self.flags}'
+if mupdf_version_tuple >= (1, 25, 2):
+ret += f' {self.char_flags}'
+ret += f' {self.font} {self.color} {self.asc} {self.desc}'
+return ret
+old_style = char_style()
+style = char_style()
+span = None
+span_origin = None
+for ch in line:
+# start-trace
+r = JM_char_bbox(line, ch)
+if (not JM_rects_overlap(tp_rect, r)
+and not mupdf.fz_is_infinite_rect(tp_rect)
+):
+continue
+# Info from:
+# detect_super_script()
+# fz_font_is_italic()
+# fz_font_is_serif()
+# fz_font_is_monospaced()
+# fz_font_is_bold()
+flags = JM_char_font_flags(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)), line, ch)
+origin = mupdf.FzPoint(ch.m_internal.origin)
+style.size = ch.m_internal.size
+style.flags = flags
+if mupdf_version_tuple >= (1, 25, 2):
+# FZ_STEXT_SYNTHETIC is per-char, not per-span.
+style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
+style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
+style.argb = ch.m_internal.argb
+style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
+style.desc = JM_font_descender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
+style.bidi = ch.m_internal.bidi
+if (style.size != old_style.size
+or style.flags != old_style.flags
+or (mupdf_version_tuple >= (1, 25, 2)
+and (style.char_flags != old_style.char_flags)
+)
+or style.argb != old_style.argb
+or style.font != old_style.font
+or style.bidi != old_style.bidi
+):
+if old_style.size >= 0:
+# not first one, output previous
+if raw:
+# put character list in the span
+span[dictkey_chars] = char_list
+char_list = None
+else:
+# put text string in the span
+span[dictkey_text] = JM_EscapeStrFromBuffer( buff)
+mupdf.fz_clear_buffer(buff)
+span[dictkey_origin] = JM_py_from_point(span_origin)
+span[dictkey_bbox] = JM_py_from_rect(span_rect)
+line_rect = mupdf.fz_union_rect(line_rect, span_rect)
+span_list.append( span)
+span = None
+span = dict()
+asc = style.asc
+desc = style.desc
+if style.asc < 1e-3:
+asc = 0.9
+desc = -0.1
+span[dictkey_size] = style.size
+span[dictkey_flags] = style.flags
+span[dictkey_bidi] = style.bidi
+if mupdf_version_tuple >= (1, 25, 2):
+span[dictkey_char_flags] = style.char_flags
+span[dictkey_font] = JM_EscapeStrFromStr(style.font)
+span[dictkey_color] = style.argb & 0xffffff
+if mupdf_version_tuple >= (1, 25, 0):
+span['alpha'] = style.argb >> 24
+span["ascender"] = asc
+span["descender"] = desc
+# Need to be careful here - doing 'old_style=style' does a shallow
+# copy, but we need to keep old_style as a distinct instance.
+old_style = char_style(style)
+span_rect = r
+span_origin = origin
+span_rect = mupdf.fz_union_rect(span_rect, r)
+if raw: # make and append a char dict
+char_dict = dict()
+char_dict[dictkey_origin] = JM_py_from_point( ch.m_internal.origin)
+char_dict[dictkey_bbox] = JM_py_from_rect(r)
+char_dict[dictkey_c] = chr(ch.m_internal.c)
+char_dict['synthetic'] = bool(ch.m_internal.flags & mupdf.FZ_STEXT_SYNTHETIC)
+if char_list is None:
+char_list = []
+char_list.append(char_dict)
+else:   # add character byte to buffer
+JM_append_rune(buff, ch.m_internal.c)
+# all characters processed, now flush remaining span
+if span:
+if raw:
+span[dictkey_chars] = char_list
+char_list = None
+else:
+span[dictkey_text] = JM_EscapeStrFromBuffer(buff)
+mupdf.fz_clear_buffer(buff)
+span[dictkey_origin] = JM_py_from_point(span_origin)
+span[dictkey_bbox] = JM_py_from_rect(span_rect)
+if not mupdf.fz_is_empty_rect(span_rect):
+span_list.append(span)
+line_rect = mupdf.fz_union_rect(line_rect, span_rect)
+span = None
+if not mupdf.fz_is_empty_rect(line_rect):
+line_dict[dictkey_spans] = span_list
+else:
+line_dict[dictkey_spans] = span_list
+return line_rect
+def _make_image_dict(img, img_dict):
+"""Populate a dictionary with information extracted from a given image.
+Used by 'Document.extract_image' and by 'JM_make_image_block'.
+Both of these functions will add some more specific information.
+"""
+img_type = img.fz_compressed_image_type()
+ext = JM_image_extension(img_type)
+# compressed image buffer if present, else None
+ll_cbuf = mupdf.ll_fz_compressed_image_buffer(img.m_internal)
+if (0
+or not ll_cbuf
+or img_type in (mupdf.FZ_IMAGE_JBIG2, mupdf.FZ_IMAGE_UNKNOWN)
+or img_type < mupdf.FZ_IMAGE_BMP
+):
+# not an image with a compressed buffer: convert to PNG
+res = mupdf.fz_new_buffer_from_image_as_png(
+img,
+mupdf.FzColorParams(mupdf.fz_default_color_params),
+)
+ext = "png"
+elif ext == "jpeg" and img.n() == 4:
+# JPEG with CMYK: invert colors
+res = mupdf.fz_new_buffer_from_image_as_jpeg(
+img, mupdf.FzColorParams(mupdf.fz_default_color_params), 95, 1)
+else:
+# copy the compressed buffer
+res = mupdf.FzBuffer(mupdf.ll_fz_keep_buffer(ll_cbuf.buffer))
+bytes_ = JM_BinFromBuffer(res)
+img_dict[dictkey_width] = img.w()
+img_dict[dictkey_height] = img.h()
+img_dict[dictkey_ext] = ext
+img_dict[dictkey_colorspace] = img.n()
+img_dict[dictkey_xres] = img.xres()
+img_dict[dictkey_yres] = img.yres()
+img_dict[dictkey_bpc] = img.bpc()
+img_dict[dictkey_size] = len(bytes_)
+img_dict[dictkey_image] = bytes_
+def JM_make_image_block(block, block_dict):
+img = block.i_image()
+_make_image_dict(img, block_dict)
+# if the image has a mask, store it as a PNG buffer
+mask = img.mask()
+if mask.m_internal:
+buff = mask.fz_new_buffer_from_image_as_png(mupdf.FzColorParams(mupdf.fz_default_color_params))
+block_dict["mask"] = buff.fz_buffer_extract()
+else:
+block_dict["mask"] = None
+block_dict[dictkey_matrix] = JM_py_from_matrix(block.i_transform())
+def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
+if g_use_extra:
+return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
+line_list = []
+block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
+#log(f'{block=}')
+for line in block:
+#log(f'{line=}')
+if (mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(line.m_internal.bbox)))
+and not mupdf.fz_is_infinite_rect(tp_rect)
+):
+continue
+line_dict = dict()
+line_rect = JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
+block_rect = mupdf.fz_union_rect(block_rect, line_rect)
+line_dict[dictkey_wmode] = line.m_internal.wmode
+line_dict[dictkey_dir] = JM_py_from_point(line.m_internal.dir)
+line_dict[dictkey_bbox] = JM_py_from_rect(line_rect)
+line_list.append(line_dict)
+block_dict[dictkey_bbox] = JM_py_from_rect(block_rect)
+block_dict[dictkey_lines] = line_list
+def JM_make_textpage_dict(tp, page_dict, raw):
+if g_use_extra:
+return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
+text_buffer = mupdf.fz_new_buffer(128)
+block_list = []
+tp_rect = mupdf.FzRect(tp.m_internal.mediabox)
+block_n = -1
+#log( 'JM_make_textpage_dict {=tp}')
+for block in tp:
+block_n += 1
+if (not mupdf.fz_contains_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox))
+and not mupdf.fz_is_infinite_rect(tp_rect)
+and block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE
+):
+continue
+if (not mupdf.fz_is_infinite_rect(tp_rect)
+and mupdf.fz_is_empty_rect(mupdf.fz_intersect_rect(tp_rect, mupdf.FzRect(block.m_internal.bbox)))
+):
+continue
+block_dict = dict()
+block_dict[dictkey_number] = block_n
+block_dict[dictkey_type] = block.m_internal.type
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_IMAGE:
+block_dict[dictkey_bbox] = JM_py_from_rect(block.m_internal.bbox)
+JM_make_image_block(block, block_dict)
+else:
+JM_make_text_block(block, block_dict, raw, text_buffer, tp_rect)
+block_list.append(block_dict)
+page_dict[dictkey_blocks] = block_list
+def JM_matrix_from_py(m):
+a = [0, 0, 0, 0, 0, 0]
+if isinstance(m, mupdf.FzMatrix):
+return m
+if isinstance(m, Matrix):
+return mupdf.FzMatrix(m.a, m.b, m.c, m.d, m.e, m.f)
+if not m or not PySequence_Check(m) or PySequence_Size(m) != 6:
+return mupdf.FzMatrix()
+for i in range(6):
+a[i] = JM_FLOAT_ITEM(m, i)
+if a[i] is None:
+return mupdf.FzRect()
+return mupdf.FzMatrix(a[0], a[1], a[2], a[3], a[4], a[5])
+def JM_mediabox(page_obj):
+'''
+return a PDF page's MediaBox
+'''
+page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
+mediabox = mupdf.pdf_to_rect(
+mupdf.pdf_dict_get_inheritable(page_obj, PDF_NAME('MediaBox'))
+)
+if mupdf.fz_is_empty_rect(mediabox) or mupdf.fz_is_infinite_rect(mediabox):
+mediabox.x0 = 0
+mediabox.y0 = 0
+mediabox.x1 = 612
+mediabox.y1 = 792
+page_mediabox = mupdf.FzRect(
+mupdf.fz_min(mediabox.x0, mediabox.x1),
+mupdf.fz_min(mediabox.y0, mediabox.y1),
+mupdf.fz_max(mediabox.x0, mediabox.x1),
+mupdf.fz_max(mediabox.y0, mediabox.y1),
+)
+if (page_mediabox.x1 - page_mediabox.x0 < 1
+or page_mediabox.y1 - page_mediabox.y0 < 1
+):
+page_mediabox = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
+return page_mediabox
+def JM_merge_range(
+doc_des,
+doc_src,
+spage,
+epage,
+apage,
+rotate,
+links,
+annots,
+show_progress,
+graft_map,
+):
+'''
+Copy a range of pages (spage, epage) from a source PDF to a specified
+location (apage) of the target PDF.
+If spage > epage, the sequence of source pages is reversed.
+'''
+if g_use_extra:
+return extra.JM_merge_range(
+doc_des,
+doc_src,
+spage,
+epage,
+apage,
+rotate,
+links,
+annots,
+show_progress,
+graft_map,
+)
+afterpage = apage
+counter = 0  # copied pages counter
+total = mupdf.fz_absi(epage - spage) + 1   # total pages to copy
+if spage < epage:
+page = spage
+while page <= epage:
+page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
+counter += 1
+if show_progress > 0 and counter % show_progress == 0:
+message(f"Inserted {counter} of {total} pages.")
+page += 1
+afterpage += 1
+else:
+page = spage
+while page >= epage:
+page_merge(doc_des, doc_src, page, afterpage, rotate, links, annots, graft_map)
+counter += 1
+if show_progress > 0 and counter % show_progress == 0:
+message(f"Inserted {counter} of {total} pages.")
+page -= 1
+afterpage += 1
+def JM_merge_resources( page, temp_res):
+'''
+Merge the /Resources object created by a text pdf device into the page.
+The device may have created multiple /ExtGState/Alp? and /Font/F? objects.
+These need to be renamed (renumbered) to not overwrite existing page
+objects from previous executions.
+Returns the next available numbers n, m for objects /Alp<n>, /F<m>.
+'''
+# page objects /Resources, /Resources/ExtGState, /Resources/Font
+resources = mupdf.pdf_dict_get(page.obj(), PDF_NAME('Resources'))
+if not resources.m_internal:
+resources = mupdf.pdf_dict_put_dict(page.obj(), PDF_NAME('Resources'), 5)
+main_extg = mupdf.pdf_dict_get(resources, PDF_NAME('ExtGState'))
+main_fonts = mupdf.pdf_dict_get(resources, PDF_NAME('Font'))
+# text pdf device objects /ExtGState, /Font
+temp_extg = mupdf.pdf_dict_get(temp_res, PDF_NAME('ExtGState'))
+temp_fonts = mupdf.pdf_dict_get(temp_res, PDF_NAME('Font'))
+max_alp = -1
+max_fonts = -1
+# Handle /Alp objects
+if mupdf.pdf_is_dict(temp_extg):   # any created at all?
+n = mupdf.pdf_dict_len(temp_extg)
+if mupdf.pdf_is_dict(main_extg):   # does page have /ExtGState yet?
+for i in range(mupdf.pdf_dict_len(main_extg)):
+# get highest number of objects named /Alpxxx
+alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key(main_extg, i))
+if not alp.startswith('Alp'):
+continue
+j = mupdf.fz_atoi(alp[3:])
+if j > max_alp:
+max_alp = j
+else:   # create a /ExtGState for the page
+main_extg = mupdf.pdf_dict_put_dict(resources, PDF_NAME('ExtGState'), n)
+max_alp += 1
+for i in range(n):  # copy over renumbered /Alp objects
+alp = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_extg, i))
+j = mupdf.fz_atoi(alp[3:]) + max_alp
+text = f'Alp{j}'
+val = mupdf.pdf_dict_get_val( temp_extg, i)
+mupdf.pdf_dict_puts(main_extg, text, val)
+if mupdf.pdf_is_dict(main_fonts):  # has page any fonts yet?
+for i in range(mupdf.pdf_dict_len(main_fonts)):    # get max font number
+font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( main_fonts, i))
+if not font.startswith("F"):
+continue
+j = mupdf.fz_atoi(font[1:])
+if j > max_fonts:
+max_fonts = j
+else:   # create a Resources/Font for the page
+main_fonts = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Font'), 2)
+max_fonts += 1
+for i in range(mupdf.pdf_dict_len(temp_fonts)):    # copy renumbered fonts
+font = mupdf.pdf_to_name( mupdf.pdf_dict_get_key( temp_fonts, i))
+j = mupdf.fz_atoi(font[1:]) + max_fonts
+text = f'F{j}'
+val = mupdf.pdf_dict_get_val(temp_fonts, i)
+mupdf.pdf_dict_puts(main_fonts, text, val)
+return (max_alp, max_fonts) # next available numbers
+def JM_mupdf_warning( text):
+'''
+redirect MuPDF warnings
+'''
+JM_mupdf_warnings_store.append(text)
+if JM_mupdf_show_warnings:
+message(f'MuPDF warning: {text}')
+def JM_mupdf_error( text):
+JM_mupdf_warnings_store.append(text)
+if JM_mupdf_show_errors:
+message(f'MuPDF error: {text}\n')
+def JM_new_bbox_device(rc, inc_layers):
+assert isinstance(rc, list)
+return JM_new_bbox_device_Device( rc, inc_layers)
+def JM_new_buffer_from_stext_page(page):
+'''
+make a buffer from an stext_page's text
+'''
+assert isinstance(page, mupdf.FzStextPage)
+rect = mupdf.FzRect(page.m_internal.mediabox)
+buf = mupdf.fz_new_buffer(256)
+for block in page:
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
+for line in block:
+for ch in line:
+if (not JM_rects_overlap(rect, JM_char_bbox(line, ch))
+and not mupdf.fz_is_infinite_rect(rect)
+):
+continue
+mupdf.fz_append_rune(buf, ch.m_internal.c)
+mupdf.fz_append_byte(buf, ord('\n'))
+mupdf.fz_append_byte(buf, ord('\n'))
+return buf
+def JM_new_javascript(pdf, value):
+'''
+make new PDF action object from JavaScript source
+Parameters are a PDF document and a Python string.
+Returns a PDF action object.
+'''
+if value is None:
+# no argument given
+return
+data = JM_StrAsChar(value)
+if data is None:
+# not convertible to char*
+return
+res = mupdf.fz_new_buffer_from_copied_data(data.encode('utf8'))
+source = mupdf.pdf_add_stream(pdf, res, mupdf.PdfObj(), 0)
+newaction = mupdf.pdf_add_new_dict(pdf, 4)
+mupdf.pdf_dict_put(newaction, PDF_NAME('S'), mupdf.pdf_new_name('JavaScript'))
+mupdf.pdf_dict_put(newaction, PDF_NAME('JS'), source)
+return newaction
+def JM_new_output_fileptr(bio):
+return JM_new_output_fileptr_Output( bio)
+def JM_norm_rotation(rotate):
+'''
+# return normalized /Rotate value:one of 0, 90, 180, 270
+'''
+while rotate < 0:
+rotate += 360
+while rotate >= 360:
+rotate -= 360
+if rotate % 90 != 0:
+return 0
+return rotate
+def JM_object_to_buffer(what, compress, ascii):
+res = mupdf.fz_new_buffer(512)
+out = mupdf.FzOutput(res)
+mupdf.pdf_print_obj(out, what, compress, ascii)
+out.fz_close_output()
+mupdf.fz_terminate_buffer(res)
+return res
+def JM_outline_xrefs(obj, xrefs):
+'''
+Return list of outline xref numbers. Recursive function. Arguments:
+'obj' first OL item
+'xrefs' empty Python list
+'''
+if not obj.m_internal:
+return xrefs
+thisobj = obj
+while thisobj.m_internal:
+newxref = mupdf.pdf_to_num( thisobj)
+if newxref in xrefs or mupdf.pdf_dict_get( thisobj, PDF_NAME('Type')).m_internal:
+# circular ref or top of chain: terminate
+break
+xrefs.append( newxref)
+first = mupdf.pdf_dict_get( thisobj, PDF_NAME('First'))    # try go down
+if mupdf.pdf_is_dict( first):
+xrefs = JM_outline_xrefs( first, xrefs)
+thisobj = mupdf.pdf_dict_get( thisobj, PDF_NAME('Next'))   # try go next
+parent = mupdf.pdf_dict_get( thisobj, PDF_NAME('Parent'))  # get parent
+if not mupdf.pdf_is_dict( thisobj):
+thisobj = parent
+return xrefs
+def JM_page_rotation(page):
+'''
+return a PDF page's /Rotate value: one of (0, 90, 180, 270)
+'''
+rotate = 0
+obj = mupdf.pdf_dict_get_inheritable( page.obj(), mupdf.PDF_ENUM_NAME_Rotate)
+rotate = mupdf.pdf_to_int(obj)
+rotate = JM_norm_rotation(rotate)
+return rotate
+def JM_pdf_obj_from_str(doc, src):
+'''
+create PDF object from given string (new in v1.14.0: MuPDF dropped it)
+'''
+# fixme: seems inefficient to convert to bytes instance then make another
+# copy inside fz_new_buffer_from_copied_data(), but no other way?
+#
+buffer_ = mupdf.fz_new_buffer_from_copied_data(bytes(src, 'utf8'))
+stream = mupdf.fz_open_buffer(buffer_)
+lexbuf = mupdf.PdfLexbuf(mupdf.PDF_LEXBUF_SMALL)
+result = mupdf.pdf_parse_stm_obj(doc, stream, lexbuf)
+return result
+def JM_pixmap_from_display_list(
+list_,
+ctm,
+cs,
+alpha,
+clip,
+seps,
+):
+'''
+Version of fz_new_pixmap_from_display_list (util.c) to also support
+rendering of only the 'clip' part of the displaylist rectangle
+'''
+assert isinstance(list_, mupdf.FzDisplayList)
+if seps is None:
+seps = mupdf.FzSeparations()
+assert seps is None or isinstance(seps, mupdf.FzSeparations), f'{type(seps)=}: {seps}'
+rect = mupdf.fz_bound_display_list(list_)
+matrix = JM_matrix_from_py(ctm)
+rclip = JM_rect_from_py(clip)
+rect = mupdf.fz_intersect_rect(rect, rclip)    # no-op if clip is not given
+rect = mupdf.fz_transform_rect(rect, matrix)
+irect = mupdf.fz_round_rect(rect)
+assert isinstance( cs, mupdf.FzColorspace)
+pix = mupdf.fz_new_pixmap_with_bbox(cs, irect, seps, alpha)
+if alpha:
+mupdf.fz_clear_pixmap(pix)
+else:
+mupdf.fz_clear_pixmap_with_value(pix, 0xFF)
+if not mupdf.fz_is_infinite_rect(rclip):
+dev = mupdf.fz_new_draw_device_with_bbox(matrix, pix, irect)
+mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), rclip, mupdf.FzCookie())
+else:
+dev = mupdf.fz_new_draw_device(matrix, pix)
+mupdf.fz_run_display_list(list_, dev, mupdf.FzMatrix(), mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE), mupdf.FzCookie())
+mupdf.fz_close_device(dev)
+# Use special raw Pixmap constructor so we don't set alpha to true.
+return Pixmap( 'raw', pix)
+def JM_point_from_py(p):
+'''
+PySequence to fz_point. Default: (FZ_MIN_INF_RECT, FZ_MIN_INF_RECT)
+'''
+if isinstance(p, mupdf.FzPoint):
+return p
+if isinstance(p, Point):
+return mupdf.FzPoint(p.x, p.y)
+if g_use_extra:
+return extra.JM_point_from_py( p)
+p0 = mupdf.FzPoint(0, 0)
+x = JM_FLOAT_ITEM(p, 0)
+y = JM_FLOAT_ITEM(p, 1)
+if x is None or y is None:
+return p0
+x = max( x, FZ_MIN_INF_RECT)
+y = max( y, FZ_MIN_INF_RECT)
+x = min( x, FZ_MAX_INF_RECT)
+y = min( y, FZ_MAX_INF_RECT)
+return mupdf.FzPoint(x, y)
+def JM_print_stext_page_as_text(res, page):
+'''
+Plain text output. An identical copy of fz_print_stext_page_as_text,
+but lines within a block are concatenated by space instead a new-line
+character (which else leads to 2 new-lines).
+'''
+if 1 and g_use_extra:
+return extra.JM_print_stext_page_as_text(res, page)
+assert isinstance(res, mupdf.FzBuffer)
+assert isinstance(page, mupdf.FzStextPage)
+rect = mupdf.FzRect(page.m_internal.mediabox)
+last_char = 0
+n_blocks = 0
+n_lines = 0
+n_chars = 0
+for n_blocks2, block in enumerate( page):
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
+for n_lines2, line in enumerate( block):
+for n_chars2, ch in enumerate( line):
+pass
+n_chars += n_chars2
+n_lines += n_lines2
+n_blocks += n_blocks2
+for block in page:
+if block.m_internal.type == mupdf.FZ_STEXT_BLOCK_TEXT:
+for line in block:
+last_char = 0
+for ch in line:
+chbbox = JM_char_bbox(line, ch)
+if (mupdf.fz_is_infinite_rect(rect)
+or JM_rects_overlap(rect, chbbox)
+):
+#raw += chr(ch.m_internal.c)
+last_char = ch.m_internal.c
+#log( '{=last_char!r utf!r}')
+JM_append_rune(res, last_char)
+if last_char != 10 and last_char > 0:
+mupdf.fz_append_string(res, "\n")
+def JM_put_script(annot_obj, key1, key2, value):
+'''
+Create a JavaScript PDF action.
+Usable for all object types which support PDF actions, even if the
+argument name suggests annotations. Up to 2 key values can be specified, so
+JavaScript actions can be stored for '/A' and '/AA/?' keys.
+'''
+key1_obj = mupdf.pdf_dict_get(annot_obj, key1)
+pdf = mupdf.pdf_get_bound_document(annot_obj)  # owning PDF
+# if no new script given, just delete corresponding key
+if not value:
+if key2 is None or not key2.m_internal:
+mupdf.pdf_dict_del(annot_obj, key1)
+elif key1_obj.m_internal:
+mupdf.pdf_dict_del(key1_obj, key2)
+return
+# read any existing script as a PyUnicode string
+if not key2.m_internal or not key1_obj.m_internal:
+script = JM_get_script(key1_obj)
+else:
+script = JM_get_script(mupdf.pdf_dict_get(key1_obj, key2))
+# replace old script, if different from new one
+if value != script:
+newaction = JM_new_javascript(pdf, value)
+if not key2.m_internal:
+mupdf.pdf_dict_put(annot_obj, key1, newaction)
+else:
+mupdf.pdf_dict_putl(annot_obj, newaction, key1, key2)
+def JM_py_from_irect(r):
+return r.x0, r.y0, r.x1, r.y1
+def JM_py_from_matrix(m):
+return m.a, m.b, m.c, m.d, m.e, m.f
+def JM_py_from_point(p):
+return p.x, p.y
+def JM_py_from_quad(q):
+'''
+PySequence from fz_quad.
+'''
+return (
+(q.ul.x, q.ul.y),
+(q.ur.x, q.ur.y),
+(q.ll.x, q.ll.y),
+(q.lr.x, q.lr.y),
+)
+def JM_py_from_rect(r):
+return r.x0, r.y0, r.x1, r.y1
+def JM_quad_from_py(r):
+if isinstance(r, mupdf.FzQuad):
+return r
+# cover all cases of 4-float-sequences
+if hasattr(r, "__getitem__") and len(r) == 4 and hasattr(r[0], "__float__"):
+r = mupdf.FzRect(*tuple(r))
+if isinstance( r, mupdf.FzRect):
+return mupdf.fz_quad_from_rect( r)
+if isinstance( r, Quad):
+return mupdf.fz_make_quad(
+r.ul.x, r.ul.y,
+r.ur.x, r.ur.y,
+r.ll.x, r.ll.y,
+r.lr.x, r.lr.y,
+)
+q = mupdf.fz_make_quad(0, 0, 0, 0, 0, 0, 0, 0)
+p = [0,0,0,0]
+if not r or not isinstance(r, (tuple, list)) or len(r) != 4:
+return q
+if JM_FLOAT_ITEM(r, 0) is None:
+return mupdf.fz_quad_from_rect(JM_rect_from_py(r))
+for i in range(4):
+if i >= len(r):
+return q    # invalid: cancel the rest
+obj = r[i]  # next point item
+if not PySequence_Check(obj) or PySequence_Size(obj) != 2:
+return q    # invalid: cancel the rest
+p[i].x = JM_FLOAT_ITEM(obj, 0)
+p[i].y = JM_FLOAT_ITEM(obj, 1)
+if p[i].x is None or p[i].y is None:
+return q
+p[i].x = max( p[i].x, FZ_MIN_INF_RECT)
+p[i].y = max( p[i].y, FZ_MIN_INF_RECT)
+p[i].x = min( p[i].x, FZ_MAX_INF_RECT)
+p[i].y = min( p[i].y, FZ_MAX_INF_RECT)
+q.ul = p[0]
+q.ur = p[1]
+q.ll = p[2]
+q.lr = p[3]
+return q
+def JM_read_contents(pageref):
+'''
+Read and concatenate a PDF page's /Contents object(s) in a buffer
+'''
+assert isinstance(pageref, mupdf.PdfObj), f'{type(pageref)}'
+contents = mupdf.pdf_dict_get(pageref, mupdf.PDF_ENUM_NAME_Contents)
+if mupdf.pdf_is_array(contents):
+res = mupdf.FzBuffer(1024)
+for i in range(mupdf.pdf_array_len(contents)):
+if i > 0:
+mupdf.fz_append_byte(res, 32)
+obj = mupdf.pdf_array_get(contents, i)
+if mupdf.pdf_is_stream(obj):
+nres = mupdf.pdf_load_stream(obj)
+mupdf.fz_append_buffer(res, nres)
+elif contents.m_internal:
+res = mupdf.pdf_load_stream(contents)
+else:
+res = mupdf.FzBuffer(0)
+return res
+def JM_rect_from_py(r):
+if isinstance(r, mupdf.FzRect):
+return r
+if isinstance(r, mupdf.FzIrect):
+return mupdf.FzRect(r)
+if isinstance(r, Rect):
+return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
+if isinstance(r, IRect):
+return mupdf.fz_make_rect(r.x0, r.y0, r.x1, r.y1)
+if not r or not PySequence_Check(r) or PySequence_Size(r) != 4:
+return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
+f = [0, 0, 0, 0]
+for i in range(4):
+f[i] = JM_FLOAT_ITEM(r, i)
+if f[i] is None:
+return mupdf.FzRect(mupdf.FzRect.Fixed_INFINITE)
+if f[i] < FZ_MIN_INF_RECT:
+f[i] = FZ_MIN_INF_RECT
+if f[i] > FZ_MAX_INF_RECT:
+f[i] = FZ_MAX_INF_RECT
+return mupdf.fz_make_rect(f[0], f[1], f[2], f[3])
+def JM_rects_overlap(a, b):
+if (0
+or a.x0 >= b.x1
+or a.y0 >= b.y1
+or a.x1 <= b.x0
+or a.y1 <= b.y0
+):
+return 0
+return 1
+def JM_refresh_links( page):
+'''
+refreshes the link and annotation tables of a page
+'''
+if page is None or not page.m_internal:
+return
+obj = mupdf.pdf_dict_get( page.obj(), PDF_NAME('Annots'))
+if obj.m_internal:
+pdf = page.doc()
+number = mupdf.pdf_lookup_page_number( pdf, page.obj())
+page_mediabox = mupdf.FzRect()
+page_ctm = mupdf.FzMatrix()
+mupdf.pdf_page_transform( page, page_mediabox, page_ctm)
+link = mupdf.pdf_load_link_annots( pdf, page, obj, number, page_ctm)
+page.m_internal.links = mupdf.ll_fz_keep_link( link.m_internal)
+def JM_rotate_page_matrix(page):
+'''
+calculate page rotation matrices
+'''
+if not page.m_internal:
+return mupdf.FzMatrix()  # no valid pdf page given
+rotation = JM_page_rotation(page)
+#log( '{rotation=}')
+if rotation == 0:
+return mupdf.FzMatrix()  # no rotation
+cb_size = JM_cropbox_size(page.obj())
+w = cb_size.x
+h = cb_size.y
+#log( '{=h w}')
+if rotation == 90:
+m = mupdf.fz_make_matrix(0, 1, -1, 0, h, 0)
+elif rotation == 180:
+m = mupdf.fz_make_matrix(-1, 0, 0, -1, w, h)
+else:
+m = mupdf.fz_make_matrix(0, -1, 1, 0, 0, w)
+#log( 'returning {m=}')
+return m
+def JM_search_stext_page(page, needle):
+if g_use_extra:
+return extra.JM_search_stext_page(page.m_internal, needle)
+rect = mupdf.FzRect(page.m_internal.mediabox)
+if not needle:
+return
+quads = []
+class Hits:
+def __str__(self):
+return f'Hits(len={self.len} quads={self.quads} hfuzz={self.hfuzz} vfuzz={self.vfuzz}'
+hits = Hits()
+hits.len = 0
+hits.quads = quads
+hits.hfuzz = 0.2    # merge kerns but not large gaps
+hits.vfuzz = 0.1
+buffer_ = JM_new_buffer_from_stext_page(page)
+haystack_string = mupdf.fz_string_from_buffer(buffer_)
+haystack = 0
+begin, end = find_string(haystack_string[haystack:], needle)
+if begin is None:
+#goto no_more_matches;
+return quads
+begin += haystack
+end += haystack
+inside = 0
+i = 0
+for block in page:
+if block.m_internal.type != mupdf.FZ_STEXT_BLOCK_TEXT:
+continue
+for line in block:
+for ch in line:
+i += 1
+if not mupdf.fz_is_infinite_rect(rect):
+r = JM_char_bbox(line, ch)
+if not JM_rects_overlap(rect, r):
+#goto next_char;
+continue
+while 1:
+#try_new_match:
+if not inside:
+if haystack >= begin:
+inside = 1
+if inside:
+if haystack < end:
+on_highlight_char(hits, line, ch)
+break
+else:
+inside = 0
+begin, end = find_string(haystack_string[haystack:], needle)
+if begin is None:
+#goto no_more_matches;
+return quads
+else:
+#goto try_new_match;
+begin += haystack
+end += haystack
+continue
+break
+haystack += 1
+#next_char:;
+assert haystack_string[haystack] == '\n', \
+f'{haystack=} {haystack_string[haystack]=}'
+haystack += 1
+assert haystack_string[haystack] == '\n', \
+f'{haystack=} {haystack_string[haystack]=}'
+haystack += 1
+#no_more_matches:;
+return quads
+def JM_scan_resources(pdf, rsrc, liste, what, stream_xref, tracer):
+'''
+Step through /Resources, looking up image, xobject or font information
+'''
+if mupdf.pdf_mark_obj(rsrc):
+mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
+return  # Circular dependencies!
+try:
+xobj = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_XObject)
+if what == 1:   # lookup fonts
+font = mupdf.pdf_dict_get(rsrc, mupdf.PDF_ENUM_NAME_Font)
+JM_gather_fonts(pdf, font, liste, stream_xref)
+elif what == 2: # look up images
+JM_gather_images(pdf, xobj, liste, stream_xref)
+elif what == 3: # look up form xobjects
+JM_gather_forms(pdf, xobj, liste, stream_xref)
+else:   # should never happen
+return
+# check if we need to recurse into Form XObjects
+n = mupdf.pdf_dict_len(xobj)
+for i in range(n):
+obj = mupdf.pdf_dict_get_val(xobj, i)
+if mupdf.pdf_is_stream(obj):
+sxref = mupdf.pdf_to_num(obj)
+else:
+sxref = 0
+subrsrc = mupdf.pdf_dict_get(obj, mupdf.PDF_ENUM_NAME_Resources)
+if subrsrc.m_internal:
+sxref_t = sxref
+if sxref_t not in tracer:
+tracer.append(sxref_t)
+JM_scan_resources( pdf, subrsrc, liste, what, sxref, tracer)
+else:
+mupdf.fz_warn('Circular dependencies! Consider page cleaning.')
+return
+finally:
+mupdf.pdf_unmark_obj(rsrc)
+def JM_set_choice_options(annot, liste):
+'''
+set ListBox / ComboBox values
+'''
+if not liste:
+return
+assert isinstance( liste, (tuple, list))
+n = len( liste)
+if n == 0:
+return
+annot_obj = mupdf.pdf_annot_obj( annot)
+pdf = mupdf.pdf_get_bound_document( annot_obj)
+optarr = mupdf.pdf_new_array( pdf, n)
+for i in range(n):
+val = liste[i]
+opt = val
+if isinstance(opt, str):
+mupdf.pdf_array_push_text_string( optarr, opt)
+else:
+assert isinstance( val, (tuple, list)) and len( val) == 2, 'bad choice field list'
+opt1, opt2 = val
+assert opt1 and opt2, 'bad choice field list'
+optarrsub = mupdf.pdf_array_push_array( optarr, 2)
+mupdf.pdf_array_push_text_string( optarrsub, opt1)
+mupdf.pdf_array_push_text_string( optarrsub, opt2)
+mupdf.pdf_dict_put( annot_obj, PDF_NAME('Opt'), optarr)
+def JM_set_field_type(doc, obj, type):
+'''
+Set the field type
+'''
+setbits = 0
+clearbits = 0
+typename = None
+if type == mupdf.PDF_WIDGET_TYPE_BUTTON:
+typename = PDF_NAME('Btn')
+setbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
+elif type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
+typename = PDF_NAME('Btn')
+clearbits = mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
+setbits = mupdf.PDF_BTN_FIELD_IS_RADIO
+elif type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
+typename = PDF_NAME('Btn')
+clearbits = (mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON | mupdf.PDF_BTN_FIELD_IS_RADIO)
+elif type == mupdf.PDF_WIDGET_TYPE_TEXT:
+typename = PDF_NAME('Tx')
+elif type == mupdf.PDF_WIDGET_TYPE_LISTBOX:
+typename = PDF_NAME('Ch')
+clearbits = mupdf.PDF_CH_FIELD_IS_COMBO
+elif type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
+typename = PDF_NAME('Ch')
+setbits = mupdf.PDF_CH_FIELD_IS_COMBO
+elif type == mupdf.PDF_WIDGET_TYPE_SIGNATURE:
+typename = PDF_NAME('Sig')
+if typename is not None and typename.m_internal:
+mupdf.pdf_dict_put(obj, PDF_NAME('FT'), typename)
+if setbits != 0 or clearbits != 0:
+bits = mupdf.pdf_dict_get_int(obj, PDF_NAME('Ff'))
+bits &= ~clearbits
+bits |= setbits
+mupdf.pdf_dict_put_int(obj, PDF_NAME('Ff'), bits)
+def JM_set_object_value(obj, key, value):
+'''
+Set a PDF dict key to some value
+'''
+eyecatcher = "fitz: replace me!"
+pdf = mupdf.pdf_get_bound_document(obj)
+# split PDF key at path seps and take last key part
+list_ = key.split('/')
+len_ = len(list_)
+i = len_ - 1
+skey = list_[i]
+del list_[i]    # del the last sub-key
+len_ = len(list_)   # remaining length
+testkey = mupdf.pdf_dict_getp(obj, key)    # check if key already exists
+if not testkey.m_internal:
+#No, it will be created here. But we cannot allow this happening if
+#indirect objects are referenced. So we check all higher level
+#sub-paths for indirect references.
+while len_ > 0:
+t = '/'.join(list_) # next high level
+if mupdf.pdf_is_indirect(mupdf.pdf_dict_getp(obj, JM_StrAsChar(t))):
+raise Exception("path to '%s' has indirects", JM_StrAsChar(skey))
+del list_[len_ - 1]   # del last sub-key
+len_ = len(list_)   # remaining length
+# Insert our eyecatcher. Will create all sub-paths in the chain, or
+# respectively remove old value of key-path.
+mupdf.pdf_dict_putp(obj, key, mupdf.pdf_new_text_string(eyecatcher))
+testkey = mupdf.pdf_dict_getp(obj, key)
+if not mupdf.pdf_is_string(testkey):
+raise Exception("cannot insert value for '%s'", key)
+temp = mupdf.pdf_to_text_string(testkey)
+if temp != eyecatcher:
+raise Exception("cannot insert value for '%s'", key)
+# read the result as a string
+res = JM_object_to_buffer(obj, 1, 0)
+objstr = JM_EscapeStrFromBuffer(res)
+# replace 'eyecatcher' by desired 'value'
+nullval = "/%s(%s)" % ( skey, eyecatcher)
+newval = "/%s %s" % (skey, value)
+newstr = objstr.replace(nullval, newval, 1)
+# make PDF object from resulting string
+new_obj = JM_pdf_obj_from_str(pdf, newstr)
+return new_obj
+def JM_set_ocg_arrays(conf, basestate, on, off, rbgroups, locked):
+if basestate:
+mupdf.pdf_dict_put_name( conf, PDF_NAME('BaseState'), basestate)
+if on is not None:
+mupdf.pdf_dict_del( conf, PDF_NAME('ON'))
+if on:
+arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('ON'), 1)
+JM_set_ocg_arrays_imp( arr, on)
+if off is not None:
+mupdf.pdf_dict_del( conf, PDF_NAME('OFF'))
+if off:
+arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('OFF'), 1)
+JM_set_ocg_arrays_imp( arr, off)
+if locked is not None:
+mupdf.pdf_dict_del( conf, PDF_NAME('Locked'))
+if locked:
+arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('Locked'), 1)
+JM_set_ocg_arrays_imp( arr, locked)
+if rbgroups is not None:
+mupdf.pdf_dict_del( conf, PDF_NAME('RBGroups'))
+if rbgroups:
+arr = mupdf.pdf_dict_put_array( conf, PDF_NAME('RBGroups'), 1)
+n =len(rbgroups)
+for i in range(n):
+item0 = rbgroups[i]
+obj = mupdf.pdf_array_push_array( arr, 1)
+JM_set_ocg_arrays_imp( obj, item0)
+def JM_set_ocg_arrays_imp(arr, list_):
+'''
+Set OCG arrays from dict of Python lists
+Works with dict like {"basestate":name, "on":list, "off":list, "rbg":list}
+'''
+pdf = mupdf.pdf_get_bound_document(arr)
+for xref in list_:
+obj = mupdf.pdf_new_indirect(pdf, xref, 0)
+mupdf.pdf_array_push(arr, obj)
+def JM_set_resource_property(ref, name, xref):
+'''
+Insert an item into Resources/Properties (used for Marked Content)
+Arguments:
+(1) e.g. page object, Form XObject
+(2) marked content name
+(3) xref of the referenced object (insert as indirect reference)
+'''
+pdf = mupdf.pdf_get_bound_document(ref)
+ind = mupdf.pdf_new_indirect(pdf, xref, 0)
+if not ind.m_internal:
+RAISEPY(MSG_BAD_XREF, PyExc_ValueError)
+resources = mupdf.pdf_dict_get(ref, PDF_NAME('Resources'))
+if not resources.m_internal:
+resources = mupdf.pdf_dict_put_dict(ref, PDF_NAME('Resources'), 1)
+properties = mupdf.pdf_dict_get(resources, PDF_NAME('Properties'))
+if not properties.m_internal:
+properties = mupdf.pdf_dict_put_dict(resources, PDF_NAME('Properties'), 1)
+mupdf.pdf_dict_put(properties, mupdf.pdf_new_name(name), ind)
+def JM_set_widget_properties(annot, Widget):
+'''
+Update the PDF form field with the properties from a Python Widget object.
+Called by "Page.add_widget" and "Annot.update_widget".
+'''
+if isinstance( annot, Annot):
+annot = annot.this
+assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
+page = _pdf_annot_page(annot)
+assert page.m_internal, 'Annot is not bound to a page'
+annot_obj = mupdf.pdf_annot_obj(annot)
+pdf = page.doc()
+def GETATTR(name):
+return getattr(Widget, name, None)
+value = GETATTR("field_type")
+field_type = value
+# rectangle --------------------------------------------------------------
+value = GETATTR("rect")
+rect = JM_rect_from_py(value)
+rot_mat = JM_rotate_page_matrix(page)
+rect = mupdf.fz_transform_rect(rect, rot_mat)
+mupdf.pdf_set_annot_rect(annot, rect)
+# fill color -------------------------------------------------------------
+value = GETATTR("fill_color")
+if value and PySequence_Check(value):
+n = len(value)
+fill_col = mupdf.pdf_new_array(pdf, n)
+col = 0
+for i in range(n):
+col = value[i]
+mupdf.pdf_array_push_real(fill_col, col)
+mupdf.pdf_field_set_fill_color(annot_obj, fill_col)
+# dashes -----------------------------------------------------------------
+value = GETATTR("border_dashes")
+if value and PySequence_Check(value):
+n = len(value)
+dashes = mupdf.pdf_new_array(pdf, n)
+for i in range(n):
+mupdf.pdf_array_push_int(dashes, value[i])
+mupdf.pdf_dict_putl(annot_obj, dashes, PDF_NAME('BS'), PDF_NAME('D'))
+# border color -----------------------------------------------------------
+value = GETATTR("border_color")
+if value and PySequence_Check(value):
+n = len(value)
+border_col = mupdf.pdf_new_array(pdf, n)
+col = 0
+for i in range(n):
+col = value[i]
+mupdf.pdf_array_push_real(border_col, col)
+mupdf.pdf_dict_putl(annot_obj, border_col, PDF_NAME('MK'), PDF_NAME('BC'))
+# entry ignored - may be used later
+#
+#int text_format = (int) PyInt_AsLong(GETATTR("text_format"));
+#
+# field label -----------------------------------------------------------
+value = GETATTR("field_label")
+if value is not None:
+label = JM_StrAsChar(value)
+mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('TU'), label)
+# field name -------------------------------------------------------------
+value = GETATTR("field_name")
+if value is not None:
+name = JM_StrAsChar(value)
+old_name = mupdf.pdf_load_field_name(annot_obj)
+if name != old_name:
+mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('T'), name)
+# max text len -----------------------------------------------------------
+if field_type == mupdf.PDF_WIDGET_TYPE_TEXT:
+value = GETATTR("text_maxlen")
+text_maxlen = value
+if text_maxlen:
+mupdf.pdf_dict_put_int(annot_obj, PDF_NAME('MaxLen'), text_maxlen)
+value = GETATTR("field_display")
+d = value
+mupdf.pdf_field_set_display(annot_obj, d)
+# choice values ----------------------------------------------------------
+if field_type in (mupdf.PDF_WIDGET_TYPE_LISTBOX, mupdf.PDF_WIDGET_TYPE_COMBOBOX):
+value = GETATTR("choice_values")
+JM_set_choice_options(annot, value)
+# border style -----------------------------------------------------------
+value = GETATTR("border_style")
+val = JM_get_border_style(value)
+mupdf.pdf_dict_putl(annot_obj, val, PDF_NAME('BS'), PDF_NAME('S'))
+# border width -----------------------------------------------------------
+value = GETATTR("border_width")
+border_width = value
+mupdf.pdf_dict_putl(
+annot_obj,
+mupdf.pdf_new_real(border_width),
+PDF_NAME('BS'),
+PDF_NAME('W'),
+)
+# /DA string -------------------------------------------------------------
+value = GETATTR("_text_da")
+da = JM_StrAsChar(value)
+mupdf.pdf_dict_put_text_string(annot_obj, PDF_NAME('DA'), da)
+mupdf.pdf_dict_del(annot_obj, PDF_NAME('DS'))  # not supported by MuPDF
+mupdf.pdf_dict_del(annot_obj, PDF_NAME('RC'))  # not supported by MuPDF
+# field flags ------------------------------------------------------------
+field_flags = GETATTR("field_flags")
+if field_flags is not None:
+if field_type == mupdf.PDF_WIDGET_TYPE_COMBOBOX:
+field_flags |= mupdf.PDF_CH_FIELD_IS_COMBO
+elif field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
+field_flags |= mupdf.PDF_BTN_FIELD_IS_RADIO
+elif field_type == mupdf.PDF_WIDGET_TYPE_BUTTON:
+field_flags |= mupdf.PDF_BTN_FIELD_IS_PUSHBUTTON
+mupdf.pdf_dict_put_int( annot_obj, PDF_NAME('Ff'), field_flags)
+# button caption ---------------------------------------------------------
+value = GETATTR("button_caption")
+ca = JM_StrAsChar(value)
+if ca:
+mupdf.pdf_field_set_button_caption(annot_obj, ca)
+# script (/A) -------------------------------------------------------
+value = GETATTR("script")
+JM_put_script(annot_obj, PDF_NAME('A'), mupdf.PdfObj(), value)
+# script (/AA/K) -------------------------------------------------------
+value = GETATTR("script_stroke")
+JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('K'), value)
+# script (/AA/F) -------------------------------------------------------
+value = GETATTR("script_format")
+JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('F'), value)
+# script (/AA/V) -------------------------------------------------------
+value = GETATTR("script_change")
+JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('V'), value)
+# script (/AA/C) -------------------------------------------------------
+value = GETATTR("script_calc")
+JM_put_script(annot_obj, PDF_NAME('AA'), PDF_NAME('C'), value)
+# script (/AA/Bl) -------------------------------------------------------
+value = GETATTR("script_blur")
+JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Bl'), value)
+# script (/AA/Fo) codespell:ignore --------------------------------------
+value = GETATTR("script_focus")
+JM_put_script(annot_obj, PDF_NAME('AA'), mupdf.pdf_new_name('Fo'), value)
+# field value ------------------------------------------------------------
+value = GETATTR("field_value")  # field value
+text = JM_StrAsChar(value)  # convert to text (may fail!)
+if field_type == mupdf.PDF_WIDGET_TYPE_RADIOBUTTON:
+if not value:
+mupdf.pdf_set_field_value(pdf, annot_obj, "Off", 1)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), "Off")
+else:
+# TODO check if another button in the group is ON and if so set it Off
+onstate = mupdf.pdf_button_field_on_state(annot_obj)
+if onstate.m_internal:
+on = mupdf.pdf_to_name(onstate)
+mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
+elif text:
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), text)
+elif field_type == mupdf.PDF_WIDGET_TYPE_CHECKBOX:
+onstate = mupdf.pdf_button_field_on_state(annot_obj)
+on = onstate.pdf_to_name()
+if value in (True, on) or text == 'Yes':
+mupdf.pdf_set_field_value(pdf, annot_obj, on, 1)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('AS'), on)
+mupdf.pdf_dict_put_name(annot_obj, PDF_NAME('V'), on)
+else:
+mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('AS'), 'Off')
+mupdf.pdf_dict_put_name( annot_obj, PDF_NAME('V'), 'Off')
+else:
+if text:
+mupdf.pdf_set_field_value(pdf, annot_obj, text, 1)
+if field_type in (mupdf.PDF_WIDGET_TYPE_COMBOBOX, mupdf.PDF_WIDGET_TYPE_LISTBOX):
+mupdf.pdf_dict_del(annot_obj, PDF_NAME('I'))
+mupdf.pdf_dirty_annot(annot)
+mupdf.pdf_set_annot_hot(annot, 1)
+mupdf.pdf_set_annot_active(annot, 1)
+mupdf.pdf_update_annot(annot)
+def JM_show_string_cs(
+text,
+user_font,
+trm,
+s,
+wmode,
+bidi_level,
+markup_dir,
+language,
+):
+i = 0
+while i < len(s):
+l, ucs = mupdf.fz_chartorune(s[i:])
+i += l
+gid = mupdf.fz_encode_character_sc(user_font, ucs)
+if gid == 0:
+gid, font = mupdf.fz_encode_character_with_fallback(user_font, ucs, 0, language)
+else:
+font = user_font
+mupdf.fz_show_glyph(text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language)
+adv = mupdf.fz_advance_glyph(font, gid, wmode)
+if wmode == 0:
+trm = mupdf.fz_pre_translate(trm, adv, 0)
+else:
+trm = mupdf.fz_pre_translate(trm, 0, -adv)
+return trm
+def JM_UnicodeFromBuffer(buff):
+buff_bytes = mupdf.fz_buffer_extract_copy(buff)
+val = buff_bytes.decode(errors='replace')
+z = val.find(chr(0))
+if z >= 0:
+val = val[:z]
+return val
+def message_warning(text):
+'''
+Generate a warning.
+'''
+message(f'warning: {text}')
+def JM_update_stream(doc, obj, buffer_, compress):
+'''
+update a stream object
+compress stream when beneficial
+'''
+if compress:
+length, _ = mupdf.fz_buffer_storage(buffer_)
+if length > 30:   # ignore small stuff
+buffer_compressed = JM_compress_buffer(buffer_)
+assert isinstance(buffer_compressed, mupdf.FzBuffer)
+if buffer_compressed.m_internal:
+length_compressed, _ = mupdf.fz_buffer_storage(buffer_compressed)
+if length_compressed < length:  # was it worth the effort?
+mupdf.pdf_dict_put(
+obj,
+mupdf.PDF_ENUM_NAME_Filter,
+mupdf.PDF_ENUM_NAME_FlateDecode,
+)
+mupdf.pdf_update_stream(doc, obj, buffer_compressed, 1)
+return
+mupdf.pdf_update_stream(doc, obj, buffer_, 0)
+def JM_xobject_from_page(pdfout, fsrcpage, xref, gmap):
+'''
+Make an XObject from a PDF page
+For a positive xref assume that its object can be used instead
+'''
+assert isinstance(gmap, mupdf.PdfGraftMap), f'{type(gmap)=}'
+if xref > 0:
+xobj1 = mupdf.pdf_new_indirect(pdfout, xref, 0)
+else:
+srcpage = _as_pdf_page(fsrcpage.this)
+spageref = srcpage.obj()
+mediabox = mupdf.pdf_to_rect(mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('MediaBox')))
+# Deep-copy resources object of source page
+o = mupdf.pdf_dict_get_inheritable(spageref, PDF_NAME('Resources'))
+if gmap.m_internal:
+# use graftmap when possible
+resources = mupdf.pdf_graft_mapped_object(gmap, o)
+else:
+resources = mupdf.pdf_graft_object(pdfout, o)
+# get spgage contents source
+res = JM_read_contents(spageref)
+#-------------------------------------------------------------
+# create XObject representing the source page
+#-------------------------------------------------------------
+xobj1 = mupdf.pdf_new_xobject(pdfout, mediabox, mupdf.FzMatrix(), mupdf.PdfObj(0), res)
+# store spage contents
+JM_update_stream(pdfout, xobj1, res, 1)
+# store spage resources
+mupdf.pdf_dict_put(xobj1, PDF_NAME('Resources'), resources)
+return xobj1
+def PySequence_Check(s):
+return isinstance(s, (tuple, list))
+def PySequence_Size(s):
+return len(s)
+# constants: error messages. These are also in extra.i.
+#
+MSG_BAD_ANNOT_TYPE = "bad annot type"
+MSG_BAD_APN = "bad or missing annot AP/N"
+MSG_BAD_ARG_INK_ANNOT = "arg must be seq of seq of float pairs"
+MSG_BAD_ARG_POINTS = "bad seq of points"
+MSG_BAD_BUFFER = "bad type: 'buffer'"
+MSG_BAD_COLOR_SEQ = "bad color sequence"
+MSG_BAD_DOCUMENT = "cannot open broken document"
+MSG_BAD_FILETYPE = "bad filetype"
+MSG_BAD_LOCATION = "bad location"
+MSG_BAD_OC_CONFIG = "bad config number"
+MSG_BAD_OC_LAYER = "bad layer number"
+MSG_BAD_OC_REF = "bad 'oc' reference"
+MSG_BAD_PAGEID = "bad page id"
+MSG_BAD_PAGENO = "bad page number(s)"
+MSG_BAD_PDFROOT = "PDF has no root"
+MSG_BAD_RECT = "rect is infinite or empty"
+MSG_BAD_TEXT = "bad type: 'text'"
+MSG_BAD_XREF = "bad xref"
+MSG_COLOR_COUNT_FAILED = "color count failed"
+MSG_FILE_OR_BUFFER = "need font file or buffer"
+MSG_FONT_FAILED = "cannot create font"
+MSG_IS_NO_ANNOT = "is no annotation"
+MSG_IS_NO_IMAGE = "is no image"
+MSG_IS_NO_PDF = "is no PDF"
+MSG_IS_NO_DICT = "object is no PDF dict"
+MSG_PIX_NOALPHA = "source pixmap has no alpha"
+MSG_PIXEL_OUTSIDE = "pixel(s) outside image"
+JM_Exc_FileDataError = 'FileDataError'
+PyExc_ValueError = 'ValueError'
+def RAISEPY( msg, exc):
+#JM_Exc_CurrentException=exc
+#fz_throw(context, FZ_ERROR_GENERIC, msg)
+raise Exception( msg)
+def PyUnicode_DecodeRawUnicodeEscape(s, errors='strict'):
+# FIXED: handle raw unicode escape sequences
+if not s:
+return ""
+if isinstance(s, str):
+rc = s.encode("utf8", errors=errors)
+elif isinstance(s, bytes):
+rc = s[:]
+ret = rc.decode('raw_unicode_escape', errors=errors)
+return ret
+def CheckColor(c: OptSeq):
+if c:
+if (
+type(c) not in (list, tuple)
+or len(c) not in (1, 3, 4)
+or min(c) < 0
+or max(c) > 1
+):
+raise ValueError("need 1, 3 or 4 color components in range 0 to 1")
+def CheckFont(page: Page, fontname: str) -> tuple:
+"""Return an entry in the page's font list if reference name matches.
+"""
+for f in page.get_fonts():
+if f[4] == fontname:
+return f
+def CheckFontInfo(doc: Document, xref: int) -> list:
+"""Return a font info if present in the document.
+"""
+for f in doc.FontInfos:
+if xref == f[0]:
+return f
+def CheckMarkerArg(quads: typing.Any) -> tuple:
+if CheckRect(quads):
+r = Rect(quads)
+return (r.quad,)
+if CheckQuad(quads):
+return (quads,)
+for q in quads:
+if not (CheckRect(q) or CheckQuad(q)):
+raise ValueError("bad quads entry")
+return quads
+def CheckMorph(o: typing.Any) -> bool:
+if not bool(o):
+return False
+if not (type(o) in (list, tuple) and len(o) == 2):
+raise ValueError("morph must be a sequence of length 2")
+if not (len(o[0]) == 2 and len(o[1]) == 6):
+raise ValueError("invalid morph param 0")
+if not o[1][4] == o[1][5] == 0:
+raise ValueError("invalid morph param 1")
+return True
+def CheckParent(o: typing.Any):
+return
+if not hasattr(o, "parent") or o.parent is None:
+raise ValueError(f"orphaned object {type(o)=}: parent is None")
+def CheckQuad(q: typing.Any) -> bool:
+"""Check whether an object is convex, not empty  quad-like.
+It must be a sequence of 4 number pairs.
+"""
+try:
+q0 = Quad(q)
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+return False
+return q0.is_convex
+def CheckRect(r: typing.Any) -> bool:
+"""Check whether an object is non-degenerate rect-like.
+It must be a sequence of 4 numbers.
+"""
+try:
+r = Rect(r)
+except Exception:
+if g_exceptions_verbose > 1:    exception_info()
+return False
+return not (r.is_empty or r.is_infinite)
+def ColorCode(c: typing.Union[list, tuple, float, None], f: str) -> str:
+if not c:
+return ""
+if hasattr(c, "__float__"):
+c = (c,)
+CheckColor(c)
+if len(c) == 1:
+s = _format_g(c[0]) + " "
+return s + "G " if f == "c" else s + "g "
+if len(c) == 3:
+s = _format_g(tuple(c)) + " "
+return s + "RG " if f == "c" else s + "rg "
+s = _format_g(tuple(c)) + " "
+return s + "K " if f == "c" else s + "k "
+def Page__add_text_marker(self, quads, annot_type):
+pdfpage = self._pdf_page()
+rotation = JM_page_rotation(pdfpage)
+def final():
+if rotation != 0:
+mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), rotation)
+try:
+if rotation != 0:
+mupdf.pdf_dict_put_int(pdfpage.obj(), PDF_NAME('Rotate'), 0)
+annot = mupdf.pdf_create_annot(pdfpage, annot_type)
+for item in quads:
+q = JM_quad_from_py(item)
+mupdf.pdf_add_annot_quad_point(annot, q)
+mupdf.pdf_update_annot(annot)
+JM_add_annot_id(annot, "A")
+final()
+except Exception:
+if g_exceptions_verbose:    exception_info()
+final()
+return
+return Annot(annot)
+def PDF_NAME(x):
+assert isinstance(x, str)
+ret = getattr(mupdf, f'PDF_ENUM_NAME_{x}')
+# Note that we return a (swig proxy for) pdf_obj*, not a mupdf.PdfObj. In
+# the C++ API, the constructor PdfObj::PdfObj(pdf_obj*) is marked as
+# explicit, but this seems to be ignored by SWIG. If SWIG started to
+# generate code that respected `explicit`, we would need to do `return
+# mupdf.PdfObj(ret)`.
+#
+# [Compare with extra.i, where we define our own PDF_NAME2() macro that
+# returns a mupdf::PdfObj.]
+return ret
+def UpdateFontInfo(doc: Document, info: typing.Sequence):
+xref = info[0]
+found = False
+for i, fi in enumerate(doc.FontInfos):
+if fi[0] == xref:
+found = True
+break
+if found:
+doc.FontInfos[i] = info
+else:
+doc.FontInfos.append(info)
+def args_match(args, *types):
+'''
+Returns true if <args> matches <types>.
+Each item in <types> is a type or tuple of types. Any of these types will
+match an item in <args>. `None` will match anything in <args>. `type(None)`
+will match an arg whose value is `None`.
+'''
+j = 0
+for i in range(len(types)):
+type_ = types[i]
+if j >= len(args):
+if isinstance(type_, tuple) and None in type_:
+# arg is missing but has default value.
+continue
+else:
+return False
+if type_ is not None and not isinstance(args[j], type_):
+return False
+j += 1
+if j != len(args):
+return False
+return True
+def calc_image_matrix(width, height, tr, rotate, keep):
+'''
+# compute image insertion matrix
+'''
+trect = JM_rect_from_py(tr)
+rot = mupdf.fz_rotate(rotate)
+trw = trect.x1 - trect.x0
+trh = trect.y1 - trect.y0
+w = trw
+h = trh
+if keep:
+large = max(width, height)
+fw = width / large
+fh = height / large
+else:
+fw = fh = 1
+small = min(fw, fh)
+if rotate != 0 and rotate != 180:
+f = fw
+fw = fh
+fh = f
+if fw < 1:
+if trw / fw > trh / fh:
+w = trh * small
+h = trh
+else:
+w = trw
+h = trw / small
+elif fw != fh:
+if trw / fw > trh / fh:
+w = trh / small
+h = trh
+else:
+w = trw
+h = trw * small
+else:
+w = trw
+h = trh
+tmp = mupdf.fz_make_point(
+(trect.x0 + trect.x1) / 2,
+(trect.y0 + trect.y1) / 2,
+)
+mat = mupdf.fz_make_matrix(1, 0, 0, 1, -0.5, -0.5)
+mat = mupdf.fz_concat(mat, rot)
+mat = mupdf.fz_concat(mat, mupdf.fz_scale(w, h))
+mat = mupdf.fz_concat(mat, mupdf.fz_translate(tmp.x, tmp.y))
+return mat
+def detect_super_script(line, ch):
+if line.m_internal.wmode == 0 and line.m_internal.dir.x == 1 and line.m_internal.dir.y == 0:
+return ch.m_internal.origin.y < line.m_internal.first_char.origin.y - ch.m_internal.size * 0.1
+return 0
+def dir_str(x):
+ret = f'{x} {type(x)} ({len(dir(x))}):\n'
+for i in dir(x):
+ret += f'    {i}\n'
+return ret
+def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, ordering: int) -> str:
+""" Return a PDF string enclosed in [] brackets, suitable for the PDF TJ
+operator.
+Notes:
+The input string is converted to either 2 or 4 hex digits per character.
+Args:
+simple: no glyphs: 2-chars, use char codes as the glyph
+glyphs: 2-chars, use glyphs instead of char codes (Symbol,
+ZapfDingbats)
+not simple: ordering < 0: 4-chars, use glyphs not char codes
+ordering >=0: a CJK font! 4 chars, use char codes as glyphs
+"""
+if text.startswith("[<") and text.endswith(">]"):  # already done
+return text
+if not bool(text):
+return "[<>]"
+if simple:  # each char or its glyph is coded as a 2-byte hex
+if glyphs is None:  # not Symbol, not ZapfDingbats: use char code
+otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text])
+else:  # Symbol or ZapfDingbats: use glyphs
+otxt = "".join(
+["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text]
+)
+return "[<" + otxt + ">]"
+# non-simple fonts: each char or its glyph is coded as 4-byte hex
+if ordering < 0:  # not a CJK font: use the glyphs
+otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text])
+else:  # CJK: use the char codes
+otxt = "".join(["%04x" % ord(c) for c in text])
+return "[<" + otxt + ">]"
+def get_pdf_str(s: str) -> str:
+""" Return a PDF string depending on its coding.
+Notes:
+Returns a string bracketed with either "()" or "<>" for hex values.
+If only ascii then "(original)" is returned, else if only 8 bit chars
+then "(original)" with interspersed octal strings \nnn is returned,
+else a string "<FEFF[hexstring]>" is returned, where [hexstring] is the
+UTF-16BE encoding of the original.
+"""
+if not bool(s):
+return "()"
+def make_utf16be(s):
+r = bytearray([254, 255]) + bytearray(s, "UTF-16BE")
+return "<" + r.hex() + ">"  # brackets indicate hex
+# The following either returns the original string with mixed-in
+# octal numbers \nnn for chars outside the ASCII range, or returns
+# the UTF-16BE BOM version of the string.
+r = ""
+for c in s:
+oc = ord(c)
+if oc > 255:  # shortcut if beyond 8-bit code range
+return make_utf16be(s)
+if oc > 31 and oc < 127:  # in ASCII range
+if c in ("(", ")", "\\"):  # these need to be escaped
+r += "\\"
+r += c
+continue
+if oc > 127:  # beyond ASCII
+r += "\\%03o" % oc
+continue
+# now the white spaces
+if oc == 8:  # backspace
+r += "\\b"
+elif oc == 9:  # tab
+r += "\\t"
+elif oc == 10:  # line feed
+r += "\\n"
+elif oc == 12:  # form feed
+r += "\\f"
+elif oc == 13:  # carriage return
+r += "\\r"
+else:
+r += "\\267"  # unsupported: replace by 0xB7
+return "(" + r + ")"
+def get_tessdata(tessdata=None):
+"""Detect Tesseract language support folder.
+This function is used to enable OCR via Tesseract even if the language
+support folder is not specified directly or in environment variable
+TESSDATA_PREFIX.
+* If <tessdata> is set we return it directly.
+* Otherwise we return `os.environ['TESSDATA_PREFIX']` if set.
+* Otherwise we search for a Tesseract installation and return its language
+support folder.
+* Otherwise we raise an exception.
+"""
+if tessdata:
+return tessdata
+tessdata = os.getenv("TESSDATA_PREFIX")
+if tessdata:  # use environment variable if set
+return tessdata
+# Try to locate the tesseract-ocr installation.
+import subprocess
+cp = subprocess.run('tesseract --list-langs', shell=1, capture_output=1, check=0, text=True)
+if cp.returncode == 0:
+m = re.search('List of available languages in "(.+)"', cp.stdout)
+if m:
+tessdata = m.group(1)
+return tessdata
+# Windows systems:
+if sys.platform == "win32":
+cp = subprocess.run("where tesseract", shell=1, capture_output=1, check=0, text=True)
+response = cp.stdout.strip()
+if cp.returncode or not response:
+raise RuntimeError("No tessdata specified and Tesseract is not installed")
+dirname = os.path.dirname(response)  # path of tesseract.exe
+tessdata = os.path.join(dirname, "tessdata")  # language support
+if os.path.exists(tessdata):  # all ok?
+return tessdata
+else:  # should not happen!
+raise RuntimeError("No tessdata specified and Tesseract installation has no {tessdata} folder")
+# Unix-like systems:
+attempts = list()
+for path in 'tesseract-ocr', 'tesseract':
+cp = subprocess.run(f'whereis {path}', shell=1, capture_output=1, check=0, text=True)
+if cp.returncode == 0:
+response = cp.stdout.strip().split()
+if len(response) == 2:
+# search tessdata in folder structure
+dirname = response[1]  # contains tesseract-ocr installation folder
+pattern = f"{dirname}/*/tessdata"
+attempts.append(pattern)
+tessdatas = glob.glob(pattern)
+tessdatas.sort()
+if tessdatas:
+return tessdatas[-1]
+if attempts:
+text = 'No tessdata specified and no match for:\n'
+for attempt in attempts:
+text += f'    {attempt}'
+raise RuntimeError(text)
+else:
+raise RuntimeError('No tessdata specified and Tesseract is not installed')
+def css_for_pymupdf_font(
+fontcode: str, *, CSS: OptStr = None, archive: AnyType = None, name: OptStr = None
+) -> str:
+"""Create @font-face items for the given fontcode of pymupdf-fonts.
+Adds @font-face support for fonts contained in package pymupdf-fonts.
+Creates a CSS font-family for all fonts starting with string 'fontcode'.
+Note:
+The font naming convention in package pymupdf-fonts is "fontcode<sf>",
+where the suffix "sf" is either empty or one of "it", "bo" or "bi".
+These suffixes thus represent the regular, italic, bold or bold-italic
+variants of a font. For example, font code "notos" refers to fonts
+"notos" - "Noto Sans Regular"
+"notosit" - "Noto Sans Italic"
+"notosbo" - "Noto Sans Bold"
+"notosbi" - "Noto Sans Bold Italic"
+This function creates four CSS @font-face definitions and collectively
+assigns the font-family name "notos" to them (or the "name" value).
+All fitting font buffers of the pymupdf-fonts package are placed / added
+to the archive provided as parameter.
+To use the font in pymupdf.Story, execute 'set_font(fontcode)'. The correct
+font weight (bold) or style (italic) will automatically be selected.
+Expects and returns the CSS source, with the new CSS definitions appended.
+Args:
+fontcode: (str) font code for naming the font variants to include.
+E.g. "fig" adds notos, notosi, notosb, notosbi fonts.
+A maximum of 4 font variants is accepted.
+CSS: (str) CSS string to add @font-face definitions to.
+archive: (Archive, mandatory) where to place the font buffers.
+name: (str) use this as family-name instead of 'fontcode'.
+Returns:
+Modified CSS, with appended @font-face statements for each font variant
+of fontcode.
+Fontbuffers associated with "fontcode" will be added to 'archive'.
+"""
+# @font-face template string
+CSSFONT = "\n@font-face {font-family: %s; src: url(%s);%s%s}\n"
+if not type(archive) is Archive:
+raise ValueError("'archive' must be an Archive")
+if CSS is None:
+CSS = ""
+# select font codes starting with the pass-in string
+font_keys = [k for k in fitz_fontdescriptors.keys() if k.startswith(fontcode)]
+if font_keys == []:
+raise ValueError(f"No font code '{fontcode}' found in pymupdf-fonts.")
+if len(font_keys) > 4:
+raise ValueError("fontcode too short")
+if name is None:  # use this name for font-family
+name = fontcode
+for fkey in font_keys:
+font = fitz_fontdescriptors[fkey]
+bold = font["bold"]  # determine font property
+italic = font["italic"]  # determine font property
+fbuff = font["loader"]()  # load the fontbuffer
+archive.add(fbuff, fkey)  # update the archive
+bold_text = "font-weight: bold;" if bold else ""
+italic_text = "font-style: italic;" if italic else ""
+CSS += CSSFONT % (name, fkey, bold_text, italic_text)
+return CSS
+def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encoding: int =0) -> float:
+"""Calculate length of a string for a built-in font.
+Args:
+fontname: name of the font.
+fontsize: font size points.
+encoding: encoding to use, 0=Latin (default), 1=Greek, 2=Cyrillic.
+Returns:
+(float) length of text.
+"""
+fontname = fontname.lower()
+basename = Base14_fontdict.get(fontname, None)
+glyphs = None
+if basename == "Symbol":
+glyphs = symbol_glyphs
+if basename == "ZapfDingbats":
+glyphs = zapf_glyphs
+if glyphs is not None:
+w = sum([glyphs[ord(c)][1] if ord(c) < 256 else glyphs[183][1] for c in text])
+return w * fontsize
+if fontname in Base14_fontdict.keys():
+return util_measure_string(
+text, Base14_fontdict[fontname], fontsize, encoding
+)
+if fontname in (
+"china-t",
+"china-s",
+"china-ts",
+"china-ss",
+"japan",
+"japan-s",
+"korea",
+"korea-s",
+):
+return len(text) * fontsize
+raise ValueError("Font '%s' is unsupported" % fontname)
+def image_profile(img: ByteString) -> dict:
+""" Return basic properties of an image.
+Args:
+img: bytes, bytearray, io.BytesIO object or an opened image file.
+Returns:
+A dictionary with keys width, height, colorspace.n, bpc, type, ext and size,
+where 'type' is the MuPDF image type (0 to 14) and 'ext' the suitable
+file extension.
+"""
+if type(img) is io.BytesIO:
+stream = img.getvalue()
+elif hasattr(img, "read"):
+stream = img.read()
+elif type(img) in (bytes, bytearray):
+stream = img
+else:
+raise ValueError("bad argument 'img'")
+return TOOLS.image_profile(stream)
+def jm_append_merge(dev):
+'''
+Append current path to list or merge into last path of the list.
+(1) Append if first path, different item lists or not a 'stroke' version
+of previous path
+(2) If new path has the same items, merge its content into previous path
+and change path["type"] to "fs".
+(3) If "out" is callable, skip the previous and pass dictionary to it.
+'''
+#log(f'{getattr(dev, "pathdict", None)=}')
+assert isinstance(dev.out, list)
+#log( f'{dev.out=}')
+if callable(dev.method) or dev.method:  # function or method
+# callback.
+if dev.method is None:
+# fixme, this surely cannot happen?
+assert 0
+#resp = PyObject_CallFunctionObjArgs(out, dev.pathdict, NULL)
+else:
+#log(f'calling {dev.out=} {dev.method=} {dev.pathdict=}')
+resp = getattr(dev.out, dev.method)(dev.pathdict)
+if not resp:
+message("calling cdrawings callback function/method failed!")
+dev.pathdict = None
+return
+def append():
+#log(f'jm_append_merge(): clearing dev.pathdict')
+dev.out.append(dev.pathdict.copy())
+dev.pathdict.clear()
+assert isinstance(dev.out, list)
+len_ = len(dev.out) # len of output list so far
+#log('{len_=}')
+if len_ == 0:   # always append first path
+return append()
+#log(f'{getattr(dev, "pathdict", None)=}')
+thistype = dev.pathdict[ dictkey_type]
+#log(f'{thistype=}')
+if thistype != 's': # if not stroke, then append
+return append()
+prev = dev.out[ len_-1] # get prev path
+#log( f'{prev=}')
+prevtype = prev[ dictkey_type]
+#log( f'{prevtype=}')
+if prevtype != 'f': # if previous not fill, append
+return append()
+# last check: there must be the same list of items for "f" and "s".
+previtems = prev[ dictkey_items]
+thisitems = dev.pathdict[ dictkey_items]
+if previtems != thisitems:
+return append()
+#rc = PyDict_Merge(prev, dev.pathdict, 0);  // merge with no override
+try:
+for k, v in dev.pathdict.items():
+if k not in prev:
+prev[k] = v
+rc = 0
+except Exception:
+if g_exceptions_verbose:    exception_info()
+#raise
+rc = -1
+if rc == 0:
+prev[ dictkey_type] = 'fs'
+dev.pathdict.clear()
+else:
+message("could not merge stroke and fill path")
+append()
+def jm_bbox_add_rect( dev, ctx, rect, code):
+if not dev.layers:
+dev.result.append( (code, JM_py_from_rect(rect)))
+else:
+dev.result.append( (code, JM_py_from_rect(rect), dev.layer_name))
+def jm_bbox_fill_image( dev, ctx, image, ctm, alpha, color_params):
+r = mupdf.FzRect(mupdf.FzRect.Fixed_UNIT)
+r = mupdf.ll_fz_transform_rect( r.internal(), ctm)
+jm_bbox_add_rect( dev, ctx, r, "fill-image")
+def jm_bbox_fill_image_mask( dev, ctx, image, ctm, colorspace, color, alpha, color_params):
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_transform_rect(mupdf.fz_unit_rect, ctm), "fill-imgmask")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_bbox_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
+even_odd = True if even_odd else False
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path(path, None, ctm), "fill-path")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_bbox_fill_shade( dev, ctx, shade, ctm, alpha, color_params):
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_shade( shade, ctm), "fill-shade")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_bbox_stroke_text( dev, ctx, text, stroke, ctm, *args):
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, stroke, ctm), "stroke-text")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_bbox_fill_text( dev, ctx, text, ctm, *args):
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text( text, None, ctm), "fill-text")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_bbox_ignore_text( dev, ctx, text, ctm):
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_text(text, None, ctm), "ignore-text")
+def jm_bbox_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
+try:
+jm_bbox_add_rect( dev, ctx, mupdf.ll_fz_bound_path( path, stroke, ctm), "stroke-path")
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_checkquad(dev):
+'''
+Check whether the last 4 lines represent a quad.
+Because of how we count, the lines are a polyline already, i.e. last point
+of a line equals 1st point of next line.
+So we check for a polygon (last line's end point equals start point).
+If not true we return 0.
+'''
+#log(f'{getattr(dev, "pathdict", None)=}')
+items = dev.pathdict[ dictkey_items]
+len_ = len(items)
+f = [0] * 8 # coordinates of the 4 corners
+# fill the 8 floats in f, start from items[-4:]
+for i in range( 4): # store line start points
+line = items[ len_ - 4 + i]
+temp = JM_point_from_py( line[1])
+f[i * 2] = temp.x
+f[i * 2 + 1] = temp.y
+lp = JM_point_from_py( line[ 2])
+if lp.x != f[0] or lp.y != f[1]:
+# not a polygon!
+#dev.linecount -= 1
+return 0
+# we have detected a quad
+dev.linecount = 0   # reset this
+# a quad item is ("qu", (ul, ur, ll, lr)), where the tuple items
+# are pairs of floats representing a quad corner each.
+# relationship of float array to quad points:
+# (0, 1) = ul, (2, 3) = ll, (6, 7) = ur, (4, 5) = lr
+q = mupdf.fz_make_quad(f[0], f[1], f[6], f[7], f[2], f[3], f[4], f[5])
+rect = ('qu', JM_py_from_quad(q))
+items[ len_ - 4] = rect  # replace item -4 by rect
+del items[ len_ - 3 : len_]  # delete remaining 3 items
+return 1
+def jm_checkrect(dev):
+'''
+Check whether the last 3 path items represent a rectangle.
+Returns 1 if we have modified the path, otherwise 0.
+'''
+#log(f'{getattr(dev, "pathdict", None)=}')
+dev.linecount = 0   # reset line count
+orientation = 0 # area orientation of rectangle
+items = dev.pathdict[ dictkey_items]
+len_ = len(items)
+line0 = items[ len_ - 3]
+ll = JM_point_from_py( line0[ 1])
+lr = JM_point_from_py( line0[ 2])
+# no need to extract "line1"!
+line2 = items[ len_ - 1]
+ur = JM_point_from_py( line2[ 1])
+ul = JM_point_from_py( line2[ 2])
+# Assumption:
+# When decomposing rects, MuPDF always starts with a horizontal line,
+# followed by a vertical line, followed by a horizontal line.
+# First line: (ll, lr), third line: (ul, ur).
+# If 1st line is below 3rd line, we record anti-clockwise (+1), else
+# clockwise (-1) orientation.
+if (0
+or ll.y != lr.y
+or ll.x != ul.x
+or ur.y != ul.y
+or ur.x != lr.x
+):
+return 0 # not a rectangle
+# we have a rect, replace last 3 "l" items by one "re" item.
+if ul.y < lr.y:
+r = mupdf.fz_make_rect(ul.x, ul.y, lr.x, lr.y)
+orientation = 1
+else:
+r = mupdf.fz_make_rect(ll.x, ll.y, ur.x, ur.y)
+orientation = -1
+rect = ( 're', JM_py_from_rect(r), orientation)
+items[ len_ - 3] = rect # replace item -3 by rect
+del items[ len_ - 2 : len_] # delete remaining 2 items
+return 1
+def jm_trace_text( dev, text, type_, ctm, colorspace, color, alpha, seqno):
+span = text.head
+while 1:
+if not span:
+break
+jm_trace_text_span( dev, span, type_, ctm, colorspace, color, alpha, seqno)
+span = span.next
+def jm_trace_text_span(dev, span, type_, ctm, colorspace, color, alpha, seqno):
+'''
+jm_trace_text_span(fz_context *ctx, PyObject *out, fz_text_span *span, int type, fz_matrix ctm, fz_colorspace *colorspace, const float *color, float alpha, size_t seqno)
+'''
+out_font = None
+assert isinstance( span, mupdf.fz_text_span)
+span = mupdf.FzTextSpan( span)
+assert isinstance( ctm, mupdf.fz_matrix)
+ctm = mupdf.FzMatrix( ctm)
+fontname = JM_font_name( span.font())
+#float rgb[3];
+#PyObject *chars = PyTuple_New(span->len);
+mat = mupdf.fz_concat(span.trm(), ctm)  # text transformation matrix
+dir = mupdf.fz_transform_vector(mupdf.fz_make_point(1, 0), mat) # writing direction
+fsize = math.sqrt(dir.x * dir.x + dir.y * dir.y)  # font size
+dir = mupdf.fz_normalize_vector(dir)
+space_adv = 0
+asc = JM_font_ascender( span.font())
+dsc = JM_font_descender( span.font())
+if asc < 1e-3:  # probably Tesseract font
+dsc = -0.1
+asc = 0.9
+# compute effective ascender / descender
+ascsize = asc * fsize / (asc - dsc)
+dscsize = dsc * fsize / (asc - dsc)
+fflags = 0  # font flags
+mono = mupdf.fz_font_is_monospaced( span.font())
+fflags += mono * TEXT_FONT_MONOSPACED
+fflags += mupdf.fz_font_is_italic( span.font()) * TEXT_FONT_ITALIC
+fflags += mupdf.fz_font_is_serif( span.font()) * TEXT_FONT_SERIFED
+fflags += mupdf.fz_font_is_bold( span.font()) * TEXT_FONT_BOLD
+last_adv = 0
+# walk through characters of span
+span_bbox = mupdf.FzRect()
+rot = mupdf.fz_make_matrix(dir.x, dir.y, -dir.y, dir.x, 0, 0)
+if dir.x == -1: # left-right flip
+rot.d = 1
+chars = []
+for i in range( span.m_internal.len):
+adv = 0
+if span.items(i).gid >= 0:
+adv = mupdf.fz_advance_glyph( span.font(), span.items(i).gid, span.m_internal.wmode)
+adv *= fsize
+last_adv = adv
+if span.items(i).ucs == 32:
+space_adv = adv
+char_orig = mupdf.fz_make_point(span.items(i).x, span.items(i).y)
+char_orig = mupdf.fz_transform_point(char_orig, ctm)
+m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -char_orig.x, -char_orig.y)
+m1 = mupdf.fz_concat(m1, rot)
+m1 = mupdf.fz_concat(m1, mupdf.FzMatrix(1, 0, 0, 1, char_orig.x, char_orig.y))
+x0 = char_orig.x
+x1 = x0 + adv
+if (
+(mat.d > 0 and (dir.x == 1 or dir.x == -1))
+or
+(mat.b != 0 and mat.b == -mat.c)
+):  # up-down flip
+y0 = char_orig.y + dscsize
+y1 = char_orig.y + ascsize
+else:
+y0 = char_orig.y - ascsize
+y1 = char_orig.y - dscsize
+char_bbox = mupdf.fz_make_rect(x0, y0, x1, y1)
+char_bbox = mupdf.fz_transform_rect(char_bbox, m1)
+chars.append(
+(
+span.items(i).ucs,
+span.items(i).gid,
+(
+char_orig.x,
+char_orig.y,
+),
+(
+char_bbox.x0,
+char_bbox.y0,
+char_bbox.x1,
+char_bbox.y1,
+),
+)
+)
+if i > 0:
+span_bbox = mupdf.fz_union_rect(span_bbox, char_bbox)
+else:
+span_bbox = char_bbox
+chars = tuple(chars)
+if not space_adv:
+if not (fflags & TEXT_FONT_MONOSPACED):
+c, out_font = mupdf.fz_encode_character_with_fallback( span.font(), 32, 0, 0)
+space_adv = mupdf.fz_advance_glyph(
+span.font(),
+c,
+span.m_internal.wmode,
+)
+space_adv *= fsize
+if not space_adv:
+space_adv = last_adv
+else:
+space_adv = last_adv    # for mono, any char width suffices
+# make the span dictionary
+span_dict = dict()
+span_dict[ 'dir'] = JM_py_from_point(dir)
+span_dict[ 'font'] = JM_EscapeStrFromStr(fontname)
+span_dict[ 'wmode'] = span.m_internal.wmode
+span_dict[ 'flags'] =fflags
+span_dict[ "bidi_lvl"] =span.m_internal.bidi_level
+span_dict[ "bidi_dir"] = span.m_internal.markup_dir
+span_dict[ 'ascender'] = asc
+span_dict[ 'descender'] = dsc
+span_dict[ 'colorspace'] = 3
+if colorspace:
+rgb = mupdf.fz_convert_color(
+mupdf.FzColorspace( mupdf.ll_fz_keep_colorspace( colorspace)),
+color,
+mupdf.fz_device_rgb(),
+mupdf.FzColorspace(),
+mupdf.FzColorParams(),
+)
+rgb = rgb[:3]   # mupdf.fz_convert_color() always returns 4 items.
+else:
+rgb = (0, 0, 0)
+if dev.linewidth > 0:   # width of character border
+linewidth = dev.linewidth
+else:
+linewidth = fsize * 0.05    # default: 5% of font size
+#log(f'{dev.linewidth=:.4f} {fsize=:.4f} {linewidth=:.4f}')
+span_dict[ 'color'] = rgb
+span_dict[ 'size'] = fsize
+span_dict[ "opacity"] = alpha
+span_dict[ "linewidth"] = linewidth
+span_dict[ "spacewidth"] = space_adv
+span_dict[ 'type'] = type_
+span_dict[ 'bbox'] = JM_py_from_rect(span_bbox)
+span_dict[ 'layer'] = dev.layer_name
+span_dict[ "seqno"] = seqno
+span_dict[ 'chars'] = chars
+#log(f'{span_dict=}')
+dev.out.append( span_dict)
+def jm_lineart_color(colorspace, color):
+#log(f' ')
+if colorspace:
+try:
+# Need to be careful to use a named Python object to ensure
+# that the `params` we pass to mupdf.ll_fz_convert_color() is
+# valid. E.g. doing:
+#
+#   rgb = mupdf.ll_fz_convert_color(..., mupdf.FzColorParams().internal())
+#
+# - seems to end up with a corrupted `params`.
+#
+cs = mupdf.FzColorspace( mupdf.FzColorspace.Fixed_RGB)
+cp = mupdf.FzColorParams()
+rgb = mupdf.ll_fz_convert_color(
+colorspace,
+color,
+cs.m_internal,
+None,
+cp.internal(),
+)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+return rgb[:3]
+return ()
+def jm_lineart_drop_device(dev, ctx):
+if isinstance(dev.out, list):
+dev.out = []
+dev.scissors = []
+def jm_lineart_fill_path( dev, ctx, path, even_odd, ctm, colorspace, color, alpha, color_params):
+#log(f'{getattr(dev, "pathdict", None)=}')
+#log(f'jm_lineart_fill_path(): {dev.seqno=}')
+even_odd = True if even_odd else False
+try:
+assert isinstance( ctm, mupdf.fz_matrix)
+dev.ctm = mupdf.FzMatrix( ctm)  # fz_concat(ctm, dev_ptm);
+dev.path_type = trace_device_FILL_PATH
+jm_lineart_path( dev, ctx, path)
+if dev.pathdict is None:
+return
+#item_count = len(dev.pathdict[ dictkey_items])
+#if item_count == 0:
+#    return
+dev.pathdict[ dictkey_type] ="f"
+dev.pathdict[ "even_odd"] = even_odd
+dev.pathdict[ "fill_opacity"] = alpha
+#log(f'setting dev.pathdict[ "closePath"] to false')
+#dev.pathdict[ "closePath"] = False
+dev.pathdict[ "fill"] = jm_lineart_color( colorspace, color)
+dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
+dev.pathdict[ "seqno"] = dev.seqno
+#jm_append_merge(dev)
+dev.pathdict[ 'layer'] = dev.layer_name
+if dev.clips:
+dev.pathdict[ 'level'] = dev.depth
+jm_append_merge(dev)
+dev.seqno += 1
+#log(f'jm_lineart_fill_path() end: {getattr(dev, "pathdict", None)=}')
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+# There are 3 text trace types:
+# 0 - fill text (PDF Tr 0)
+# 1 - stroke text (PDF Tr 1)
+# 3 - ignore text (PDF Tr 3)
+def jm_lineart_fill_text( dev, ctx, text, ctm, colorspace, color, alpha, color_params):
+if 0:
+log(f'{type(ctx)=} {ctx=}')
+log(f'{type(dev)=} {dev=}')
+log(f'{type(text)=} {text=}')
+log(f'{type(ctm)=} {ctm=}')
+log(f'{type(colorspace)=} {colorspace=}')
+log(f'{type(color)=} {color=}')
+log(f'{type(alpha)=} {alpha=}')
+log(f'{type(color_params)=} {color_params=}')
+jm_trace_text(dev, text, 0, ctm, colorspace, color, alpha, dev.seqno)
+dev.seqno += 1
+def jm_lineart_ignore_text(dev, text, ctm):
+#log(f'{getattr(dev, "pathdict", None)=}')
+jm_trace_text(dev, text, 3, ctm, None, None, 1, dev.seqno)
+dev.seqno += 1
+class Walker(mupdf.FzPathWalker2):
+def __init__(self, dev):
+super().__init__()
+self.use_virtual_moveto()
+self.use_virtual_lineto()
+self.use_virtual_curveto()
+self.use_virtual_closepath()
+self.dev = dev
+def closepath(self, ctx):    # trace_close().
+#log(f'Walker(): {self.dev.pathdict=}')
+try:
+if self.dev.linecount == 3:
+if jm_checkrect(self.dev):
+#log(f'end1: {self.dev.pathdict=}')
+return
+self.dev.linecount = 0   # reset # of consec. lines
+if self.dev.havemove:
+if self.dev.lastpoint != self.dev.firstpoint:
+item = ("l", JM_py_from_point(self.dev.lastpoint),
+JM_py_from_point(self.dev.firstpoint))
+self.dev.pathdict[dictkey_items].append(item)
+self.dev.lastpoint = self.dev.firstpoint
+self.dev.pathdict["closePath"] = False
+else:
+#log('setting self.dev.pathdict[ "closePath"] to true')
+self.dev.pathdict[ "closePath"] = True
+#log(f'end2: {self.dev.pathdict=}')
+self.dev.havemove = 0
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def curveto(self, ctx, x1, y1, x2, y2, x3, y3):   # trace_curveto().
+#log(f'Walker(): {self.dev.pathdict=}')
+try:
+self.dev.linecount = 0  # reset # of consec. lines
+p1 = mupdf.fz_make_point(x1, y1)
+p2 = mupdf.fz_make_point(x2, y2)
+p3 = mupdf.fz_make_point(x3, y3)
+p1 = mupdf.fz_transform_point(p1, self.dev.ctm)
+p2 = mupdf.fz_transform_point(p2, self.dev.ctm)
+p3 = mupdf.fz_transform_point(p3, self.dev.ctm)
+self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p1)
+self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p2)
+self.dev.pathrect = mupdf.fz_include_point_in_rect(self.dev.pathrect, p3)
+list_ = (
+"c",
+JM_py_from_point(self.dev.lastpoint),
+JM_py_from_point(p1),
+JM_py_from_point(p2),
+JM_py_from_point(p3),
+)
+self.dev.lastpoint = p3
+self.dev.pathdict[ dictkey_items].append( list_)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def lineto(self, ctx, x, y):   # trace_lineto().
+#log(f'Walker(): {self.dev.pathdict=}')
+try:
+p1 = mupdf.fz_transform_point( mupdf.fz_make_point(x, y), self.dev.ctm)
+self.dev.pathrect = mupdf.fz_include_point_in_rect( self.dev.pathrect, p1)
+list_ = (
+'l',
+JM_py_from_point( self.dev.lastpoint),
+JM_py_from_point(p1),
+)
+self.dev.lastpoint = p1
+items = self.dev.pathdict[ dictkey_items]
+items.append( list_)
+self.dev.linecount += 1 # counts consecutive lines
+if self.dev.linecount == 4 and self.dev.path_type != trace_device_FILL_PATH:
+# shrink to "re" or "qu" item
+jm_checkquad(self.dev)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def moveto(self, ctx, x, y):   # trace_moveto().
+if 0 and isinstance(self.dev.pathdict, dict):
+log(f'self.dev.pathdict:')
+for n, v in self.dev.pathdict.items():
+log( '    {type(n)=} {len(n)=} {n!r} {n}: {v!r}: {v}')
+#log(f'Walker(): {type(self.dev.pathdict)=} {self.dev.pathdict=}')
+try:
+#log( '{=dev.ctm type(dev.ctm)}')
+self.dev.lastpoint = mupdf.fz_transform_point(
+mupdf.fz_make_point(x, y),
+self.dev.ctm,
+)
+if mupdf.fz_is_infinite_rect( self.dev.pathrect):
+self.dev.pathrect = mupdf.fz_make_rect(
+self.dev.lastpoint.x,
+self.dev.lastpoint.y,
+self.dev.lastpoint.x,
+self.dev.lastpoint.y,
+)
+self.dev.firstpoint = self.dev.lastpoint
+self.dev.havemove = 1
+self.dev.linecount = 0  # reset # of consec. lines
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_lineart_path(dev, ctx, path):
+'''
+Create the "items" list of the path dictionary
+* either create or empty the path dictionary
+* reset the end point of the path
+* reset count of consecutive lines
+* invoke fz_walk_path(), which create the single items
+* if no items detected, empty path dict again
+'''
+#log(f'{getattr(dev, "pathdict", None)=}')
+try:
+dev.pathrect = mupdf.FzRect( mupdf.FzRect.Fixed_INFINITE)
+dev.linecount = 0
+dev.lastpoint = mupdf.FzPoint( 0, 0)
+dev.pathdict = dict()
+dev.pathdict[ dictkey_items] = []
+# First time we create a Walker instance is slow, e.g. 0.3s, then later
+# times run in around 0.01ms. If Walker is defined locally instead of
+# globally, each time takes 0.3s.
+#
+walker = Walker(dev)
+# Unlike fz_run_page(), fz_path_walker callbacks are not passed
+# a pointer to the struct, instead they get an arbitrary
+# void*. The underlying C++ Director callbacks use this void* to
+# identify the fz_path_walker instance so in turn we need to pass
+# arg=walker.m_internal.
+mupdf.fz_walk_path( mupdf.FzPath(mupdf.ll_fz_keep_path(path)), walker, walker.m_internal)
+# Check if any items were added ...
+if not dev.pathdict[ dictkey_items]:
+dev.pathdict = None
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_lineart_stroke_path( dev, ctx, path, stroke, ctm, colorspace, color, alpha, color_params):
+#log(f'{dev.pathdict=} {dev.clips=}')
+try:
+assert isinstance( ctm, mupdf.fz_matrix)
+dev.pathfactor = 1
+if ctm.a != 0 and abs(ctm.a) == abs(ctm.d):
+dev.pathfactor = abs(ctm.a)
+elif ctm.b != 0 and abs(ctm.b) == abs(ctm.c):
+dev.pathfactor = abs(ctm.b)
+dev.ctm = mupdf.FzMatrix( ctm)  # fz_concat(ctm, dev_ptm);
+dev.path_type = trace_device_STROKE_PATH
+jm_lineart_path( dev, ctx, path)
+if dev.pathdict is None:
+return
+dev.pathdict[ dictkey_type] = 's'
+dev.pathdict[ 'stroke_opacity'] = alpha
+dev.pathdict[ 'color'] = jm_lineart_color( colorspace, color)
+dev.pathdict[ dictkey_width] = dev.pathfactor * stroke.linewidth
+dev.pathdict[ 'lineCap'] = (
+stroke.start_cap,
+stroke.dash_cap,
+stroke.end_cap,
+)
+dev.pathdict[ 'lineJoin'] = dev.pathfactor * stroke.linejoin
+if 'closePath' not in dev.pathdict:
+#log('setting dev.pathdict["closePath"] to false')
+dev.pathdict['closePath'] = False
+# output the "dashes" string
+if stroke.dash_len:
+buff = mupdf.fz_new_buffer( 256)
+mupdf.fz_append_string( buff, "[ ") # left bracket
+for i in range( stroke.dash_len):
+# We use mupdf python's SWIG-generated floats_getitem() fn to
+# access float *stroke.dash_list[].
+value = mupdf.floats_getitem( stroke.dash_list, i)  # stroke.dash_list[i].
+mupdf.fz_append_string( buff, f'{_format_g(dev.pathfactor * value)} ')
+mupdf.fz_append_string( buff, f'] {_format_g(dev.pathfactor * stroke.dash_phase)}')
+dev.pathdict[ 'dashes'] = buff
+else:
+dev.pathdict[ 'dashes'] = '[] 0'
+dev.pathdict[ dictkey_rect] = JM_py_from_rect(dev.pathrect)
+dev.pathdict['layer'] = dev.layer_name
+dev.pathdict[ 'seqno'] = dev.seqno
+if dev.clips:
+dev.pathdict[ 'level'] = dev.depth
+jm_append_merge(dev)
+dev.seqno += 1
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def jm_lineart_clip_path(dev, ctx, path, even_odd, ctm, scissor):
+if not dev.clips:
+return
+dev.ctm = mupdf.FzMatrix(ctm)    # fz_concat(ctm, trace_device_ptm);
+dev.path_type = trace_device_CLIP_PATH
+jm_lineart_path(dev, ctx, path)
+if dev.pathdict is None:
+return
+dev.pathdict[ dictkey_type] = 'clip'
+dev.pathdict[ 'even_odd'] = bool(even_odd)
+if 'closePath' not in dev.pathdict:
+#log(f'setting dev.pathdict["closePath"] to False')
+dev.pathdict['closePath'] = False
+dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
+dev.pathdict['level'] = dev.depth
+dev.pathdict['layer'] = dev.layer_name
+jm_append_merge(dev)
+dev.depth += 1
+def jm_lineart_clip_stroke_path(dev, ctx, path, stroke, ctm, scissor):
+if not dev.clips:
+return
+dev.ctm = mupdf.FzMatrix(ctm)    # fz_concat(ctm, trace_device_ptm);
+dev.path_type = trace_device_CLIP_STROKE_PATH
+jm_lineart_path(dev, ctx, path)
+if dev.pathdict is None:
+return
+dev.pathdict['dictkey_type'] = 'clip'
+dev.pathdict['even_odd'] = None
+if 'closePath' not in dev.pathdict:
+#log(f'setting dev.pathdict["closePath"] to False')
+dev.pathdict['closePath'] = False
+dev.pathdict['scissor'] = JM_py_from_rect(compute_scissor(dev))
+dev.pathdict['level'] = dev.depth
+dev.pathdict['layer'] = dev.layer_name
+jm_append_merge(dev)
+dev.depth += 1
+def jm_lineart_clip_stroke_text(dev, ctx, text, stroke, ctm, scissor):
+if not dev.clips:
+return
+compute_scissor(dev)
+dev.depth += 1
+def jm_lineart_clip_text(dev, ctx, text, ctm, scissor):
+if not dev.clips:
+return
+compute_scissor(dev)
+dev.depth += 1
+def jm_lineart_clip_image_mask( dev, ctx, image, ctm, scissor):
+if not dev.clips:
+return
+compute_scissor(dev)
+dev.depth += 1
+def jm_lineart_pop_clip(dev, ctx):
+if not dev.clips or not dev.scissors:
+return
+len_ = len(dev.scissors)
+if len_ < 1:
+return
+del dev.scissors[-1]
+dev.depth -= 1
+def jm_lineart_begin_layer(dev, ctx, name):
+if name:
+dev.layer_name = name
+else:
+dev.layer_name = ""
+def jm_lineart_end_layer(dev, ctx):
+dev.layer_name = ""
+def jm_lineart_begin_group(dev, ctx, bbox, cs, isolated, knockout, blendmode, alpha):
+#log(f'{dev.pathdict=} {dev.clips=}')
+if not dev.clips:
+return
+dev.pathdict = { # Py_BuildValue("{s:s,s:N,s:N,s:N,s:s,s:f,s:i,s:N}",
+"type": "group",
+"rect": JM_py_from_rect(bbox),
+"isolated": bool(isolated),
+"knockout": bool(knockout),
+"blendmode": mupdf.fz_blendmode_name(blendmode),
+"opacity": alpha,
+"level": dev.depth,
+"layer": dev.layer_name
+}
+jm_append_merge(dev)
+dev.depth += 1
+def jm_lineart_end_group(dev, ctx):
+#log(f'{dev.pathdict=} {dev.clips=}')
+if not dev.clips:
+return
+dev.depth -= 1
+def jm_lineart_stroke_text(dev, ctx, text, stroke, ctm, colorspace, color, alpha, color_params):
+jm_trace_text(dev, text, 1, ctm, colorspace, color, alpha, dev.seqno)
+dev.seqno += 1
+def jm_dev_linewidth( dev, ctx, path, stroke, matrix, colorspace, color, alpha, color_params):
+dev.linewidth = stroke.linewidth
+jm_increase_seqno( dev, ctx)
+def jm_increase_seqno( dev, ctx, *vargs):
+try:
+dev.seqno += 1
+except Exception:
+if g_exceptions_verbose:    exception_info()
+raise
+def planish_line(p1: point_like, p2: point_like) -> Matrix:
+"""Compute matrix which maps line from p1 to p2 to the x-axis, such that it
+maintains its length and p1 * matrix = Point(0, 0).
+Args:
+p1, p2: point_like
+Returns:
+Matrix which maps p1 to Point(0, 0) and p2 to a point on the x axis at
+the same distance to Point(0,0). Will always combine a rotation and a
+transformation.
+"""
+p1 = Point(p1)
+p2 = Point(p2)
+return Matrix(util_hor_matrix(p1, p2))
+class JM_image_reporter_Filter(mupdf.PdfFilterOptions2):
+def __init__(self):
+super().__init__()
+self.use_virtual_image_filter()
+def image_filter( self, ctx, ctm, name, image):
+assert isinstance(ctm, mupdf.fz_matrix)
+JM_image_filter(self, mupdf.FzMatrix(ctm), name, image)
+if mupdf_cppyy:
+# cppyy doesn't appear to treat returned None as nullptr,
+# resulting in obscure 'python exception' exception.
+return 0
+class JM_new_bbox_device_Device(mupdf.FzDevice2):
+def __init__(self, result, layers):
+super().__init__()
+self.result = result
+self.layers = layers
+self.layer_name = ""
+self.use_virtual_fill_path()
+self.use_virtual_stroke_path()
+self.use_virtual_fill_text()
+self.use_virtual_stroke_text()
+self.use_virtual_ignore_text()
+self.use_virtual_fill_shade()
+self.use_virtual_fill_image()
+self.use_virtual_fill_image_mask()
+self.use_virtual_begin_layer()
+self.use_virtual_end_layer()
+begin_layer = jm_lineart_begin_layer
+end_layer = jm_lineart_end_layer
+fill_path = jm_bbox_fill_path
+stroke_path = jm_bbox_stroke_path
+fill_text = jm_bbox_fill_text
+stroke_text = jm_bbox_stroke_text
+ignore_text = jm_bbox_ignore_text
+fill_shade = jm_bbox_fill_shade
+fill_image = jm_bbox_fill_image
+fill_image_mask = jm_bbox_fill_image_mask
+class JM_new_output_fileptr_Output(mupdf.FzOutput2):
+def __init__(self, bio):
+super().__init__()
+self.bio = bio
+self.use_virtual_write()
+self.use_virtual_seek()
+self.use_virtual_tell()
+self.use_virtual_truncate()
+def seek( self, ctx, offset, whence):
+return self.bio.seek( offset, whence)
+def tell( self, ctx):
+ret = self.bio.tell()
+return ret
+def truncate( self, ctx):
+return self.bio.truncate()
+def write(self, ctx, data_raw, data_length):
+data = mupdf.raw_to_python_bytes(data_raw, data_length)
+return self.bio.write(data)
+def compute_scissor(dev):
+'''
+Every scissor of a clip is a sub rectangle of the preceding clip scissor
+if the clip level is larger.
+'''
+if dev.scissors is None:
+dev.scissors = list()
+num_scissors = len(dev.scissors)
+if num_scissors > 0:
+last_scissor = dev.scissors[num_scissors-1]
+scissor = JM_rect_from_py(last_scissor)
+scissor = mupdf.fz_intersect_rect(scissor, dev.pathrect)
+else:
+scissor = dev.pathrect
+dev.scissors.append(JM_py_from_rect(scissor))
+return scissor
+class JM_new_lineart_device_Device(mupdf.FzDevice2):
+'''
+LINEART device for Python method Page.get_cdrawings()
+'''
+#log(f'JM_new_lineart_device_Device()')
+def __init__(self, out, clips, method):
+#log(f'JM_new_lineart_device_Device.__init__()')
+super().__init__()
+# fixme: this results in "Unexpected call of unimplemented virtual_fnptrs fn FzDevice2::drop_device().".
+#self.use_virtual_drop_device()
+self.use_virtual_fill_path()
+self.use_virtual_stroke_path()
+self.use_virtual_clip_path()
+self.use_virtual_clip_image_mask()
+self.use_virtual_clip_stroke_path()
+self.use_virtual_clip_stroke_text()
+self.use_virtual_clip_text()
+self.use_virtual_fill_text
+self.use_virtual_stroke_text
+self.use_virtual_ignore_text
+self.use_virtual_fill_shade()
+self.use_virtual_fill_image()
+self.use_virtual_fill_image_mask()
+self.use_virtual_pop_clip()
+self.use_virtual_begin_group()
+self.use_virtual_end_group()
+self.use_virtual_begin_layer()
+self.use_virtual_end_layer()
+self.out = out
+self.seqno = 0
+self.depth = 0
+self.clips = clips
+self.method = method
+self.scissors = None
+self.layer_name = ""  # optional content name
+self.pathrect = None
+self.linewidth = 0
+self.ptm = mupdf.FzMatrix()
+self.ctm = mupdf.FzMatrix()
+self.rot = mupdf.FzMatrix()
+self.lastpoint = mupdf.FzPoint()
+self.firstpoint = mupdf.FzPoint()
+self.havemove = 0
+self.pathrect = mupdf.FzRect()
+self.pathfactor = 0
+self.linecount = 0
+self.path_type = 0
+#drop_device = jm_lineart_drop_device
+fill_path           = jm_lineart_fill_path
+stroke_path         = jm_lineart_stroke_path
+clip_image_mask     = jm_lineart_clip_image_mask
+clip_path           = jm_lineart_clip_path
+clip_stroke_path    = jm_lineart_clip_stroke_path
+clip_text           = jm_lineart_clip_text
+clip_stroke_text    = jm_lineart_clip_stroke_text
+fill_text           = jm_increase_seqno
+stroke_text         = jm_increase_seqno
+ignore_text         = jm_increase_seqno
+fill_shade          = jm_increase_seqno
+fill_image          = jm_increase_seqno
+fill_image_mask     = jm_increase_seqno
+pop_clip            = jm_lineart_pop_clip
+begin_group         = jm_lineart_begin_group
+end_group           = jm_lineart_end_group
+begin_layer         = jm_lineart_begin_layer
+end_layer           = jm_lineart_end_layer
+class JM_new_texttrace_device(mupdf.FzDevice2):
+'''
+Trace TEXT device for Python method Page.get_texttrace()
+'''
+def __init__(self, out):
+super().__init__()
+self.use_virtual_fill_path()
+self.use_virtual_stroke_path()
+self.use_virtual_fill_text()
+self.use_virtual_stroke_text()
+self.use_virtual_ignore_text()
+self.use_virtual_fill_shade()
+self.use_virtual_fill_image()
+self.use_virtual_fill_image_mask()
+self.use_virtual_begin_layer()
+self.use_virtual_end_layer()
+self.out = out
+self.seqno = 0
+self.depth = 0
+self.clips = 0
+self.method = None
+self.seqno = 0
+self.pathdict = dict()
+self.scissors = list()
+self.linewidth = 0
+self.ptm = mupdf.FzMatrix()
+self.ctm = mupdf.FzMatrix()
+self.rot = mupdf.FzMatrix()
+self.lastpoint = mupdf.FzPoint()
+self.pathrect = mupdf.FzRect()
+self.pathfactor = 0
+self.linecount = 0
+self.path_type = 0
+self.layer_name = ""
+fill_path = jm_increase_seqno
+stroke_path = jm_dev_linewidth
+fill_text = jm_lineart_fill_text
+stroke_text = jm_lineart_stroke_text
+ignore_text = jm_lineart_ignore_text
+fill_shade = jm_increase_seqno
+fill_image = jm_increase_seqno
+fill_image_mask = jm_increase_seqno
+begin_layer = jm_lineart_begin_layer
+end_layer = jm_lineart_end_layer
+def ConversionHeader(i: str, filename: OptStr ="unknown"):
+t = i.lower()
+import textwrap
+html = textwrap.dedent("""
+<!DOCTYPE html>
+<html>
+<head>
+<style>
+body{background-color:gray}
+div{position:relative;background-color:white;margin:1em auto}
+p{position:absolute;margin:0}
+img{position:absolute}
+</style>
+</head>
+<body>
+""")
+xml = textwrap.dedent("""
+<?xml version="1.0"?>
+<document name="%s">
+"""
+% filename
+)
+xhtml = textwrap.dedent("""
+<?xml version="1.0"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<style>
+body{background-color:gray}
+div{background-color:white;margin:1em;padding:1em}
+p{white-space:pre-wrap}
+</style>
+</head>
+<body>
+""")
+text = ""
+json = '{"document": "%s", "pages": [\n' % filename
+if t == "html":
+r = html
+elif t == "json":
+r = json
+elif t == "xml":
+r = xml
+elif t == "xhtml":
+r = xhtml
+else:
+r = text
+return r
+def ConversionTrailer(i: str):
+t = i.lower()
+text = ""
+json = "]\n}"
+html = "</body>\n</html>\n"
+xml = "</document>\n"
+xhtml = html
+if t == "html":
+r = html
+elif t == "json":
+r = json
+elif t == "xml":
+r = xml
+elif t == "xhtml":
+r = xhtml
+else:
+r = text
+return r
+def annot_preprocess(page: "Page") -> int:
+"""Prepare for annotation insertion on the page.
+Returns:
+Old page rotation value. Temporarily sets rotation to 0 when required.
+"""
+CheckParent(page)
+if not page.parent.is_pdf:
+raise ValueError("is no PDF")
+old_rotation = page.rotation
+if old_rotation != 0:
+page.set_rotation(0)
+return old_rotation
+def annot_postprocess(page: "Page", annot: "Annot") -> None:
+"""Clean up after annotation insertion.
+Set ownership flag and store annotation in page annotation dictionary.
+"""
+#annot.parent = weakref.proxy(page)
+assert isinstance( page, Page)
+assert isinstance( annot, Annot)
+annot.parent = page
+page._annot_refs[id(annot)] = annot
+annot.thisown = True
+def canon(c):
+assert isinstance(c, int)
+# TODO: proper unicode case folding
+# TODO: character equivalence (a matches ä, etc)
+if c == 0xA0 or c == 0x2028 or c == 0x2029:
+return ord(' ')
+if c == ord('\r') or c == ord('\n') or c == ord('\t'):
+return ord(' ')
+if c >= ord('A') and c <= ord('Z'):
+return c - ord('A') + ord('a')
+return c
+def chartocanon(s):
+assert isinstance(s, str)
+n, c = mupdf.fz_chartorune(s)
+c = canon(c)
+return n, c
+def dest_is_valid(o, page_count, page_object_nums, names_list):
+p = mupdf.pdf_dict_get( o, PDF_NAME('A'))
+if (
+mupdf.pdf_name_eq(
+mupdf.pdf_dict_get( p, PDF_NAME('S')),
+PDF_NAME('GoTo')
+)
+and not string_in_names_list(
+mupdf.pdf_dict_get( p, PDF_NAME('D')),
+names_list
+)
+):
+return 0
+p = mupdf.pdf_dict_get( o, PDF_NAME('Dest'))
+if not p.m_internal:
+pass
+elif mupdf.pdf_is_string( p):
+return string_in_names_list( p, names_list)
+elif not dest_is_valid_page(
+mupdf.pdf_array_get( p, 0),
+page_object_nums,
+page_count,
+):
+return 0
+return 1
+def dest_is_valid_page(obj, page_object_nums, pagecount):
+num = mupdf.pdf_to_num(obj)
+if num == 0:
+return 0
+for i in range(pagecount):
+if page_object_nums[i] == num:
+return 1
+return 0
+def find_string(s, needle):
+assert isinstance(s, str)
+for i in range(len(s)):
+end = match_string(s[i:], needle)
+if end is not None:
+end += i
+return i, end
+return None, None
+def get_pdf_now() -> str:
+'''
+"Now" timestamp in PDF Format
+'''
+import time
+tz = "%s'%s'" % (
+str(abs(time.altzone // 3600)).rjust(2, "0"),
+str((abs(time.altzone // 60) % 60)).rjust(2, "0"),
+)
+tstamp = time.strftime("D:%Y%m%d%H%M%S", time.localtime())
+if time.altzone > 0:
+tstamp += "-" + tz
+elif time.altzone < 0:
+tstamp += "+" + tz
+else:
+pass
+return tstamp
+class ElementPosition(object):
+"""Convert a dictionary with element position information to an object."""
+def __init__(self):
+pass
+def make_story_elpos():
+return ElementPosition()
+def get_highlight_selection(page, start: point_like =None, stop: point_like =None, clip: rect_like =None) -> list:
+"""Return rectangles of text lines between two points.
+Notes:
+The default of 'start' is top-left of 'clip'. The default of 'stop'
+is bottom-reight of 'clip'.
+Args:
+start: start point_like
+stop: end point_like, must be 'below' start
+clip: consider this rect_like only, default is page rectangle
+Returns:
+List of line bbox intersections with the area established by the
+parameters.
+"""
+# validate and normalize arguments
+if clip is None:
+clip = page.rect
+clip = Rect(clip)
+if start is None:
+start = clip.tl
+if stop is None:
+stop = clip.br
+clip.y0 = start.y
+clip.y1 = stop.y
+if clip.is_empty or clip.is_infinite:
+return []
+# extract text of page, clip only, no images, expand ligatures
+blocks = page.get_text(
+"dict", flags=0, clip=clip,
+)["blocks"]
+lines = []  # will return this list of rectangles
+for b in blocks:
+bbox = Rect(b["bbox"])
+if bbox.is_infinite or bbox.is_empty:
+continue
+for line in b["lines"]:
+bbox = Rect(line["bbox"])
+if bbox.is_infinite or bbox.is_empty:
+continue
+lines.append(bbox)
+if lines == []:  # did not select anything
+return lines
+lines.sort(key=lambda bbox: bbox.y1)  # sort by vertical positions
+# cut off prefix from first line if start point is close to its top
+bboxf = lines.pop(0)
+if bboxf.y0 - start.y <= 0.1 * bboxf.height:  # close enough?
+r = Rect(start.x, bboxf.y0, bboxf.br)  # intersection rectangle
+if not (r.is_empty or r.is_infinite):
+lines.insert(0, r)  # insert again if not empty
+else:
+lines.insert(0, bboxf)  # insert again
+if lines == []:  # the list might have been emptied
+return lines
+# cut off suffix from last line if stop point is close to its bottom
+bboxl = lines.pop()
+if stop.y - bboxl.y1 <= 0.1 * bboxl.height:  # close enough?
+r = Rect(bboxl.tl, stop.x, bboxl.y1)  # intersection rectangle
+if not (r.is_empty or r.is_infinite):
+lines.append(r)  # append if not empty
+else:
+lines.append(bboxl)  # append again
+return lines
+def glyph_name_to_unicode(name: str) -> int:
+"""Convenience function accessing unicodedata."""
+import unicodedata
+try:
+unc = ord(unicodedata.lookup(name))
+except Exception:
+unc = 65533
+return unc
+def hdist(dir, a, b):
+dx = b.x - a.x
+dy = b.y - a.y
+return mupdf.fz_abs(dx * dir.x + dy * dir.y)
+def make_table(rect: rect_like =(0, 0, 1, 1), cols: int =1, rows: int =1) -> list:
+"""Return a list of (rows x cols) equal sized rectangles.
+Notes:
+A utility to fill a given area with table cells of equal size.
+Args:
+rect: rect_like to use as the table area
+rows: number of rows
+cols: number of columns
+Returns:
+A list with <rows> items, where each item is a list of <cols>
+PyMuPDF Rect objects of equal sizes.
+"""
+rect = Rect(rect)  # ensure this is a Rect
+if rect.is_empty or rect.is_infinite:
+raise ValueError("rect must be finite and not empty")
+tl = rect.tl
+height = rect.height / rows  # height of one table cell
+width = rect.width / cols  # width of one table cell
+delta_h = (width, 0, width, 0)  # diff to next right rect
+delta_v = (0, height, 0, height)  # diff to next lower rect
+r = Rect(tl, tl.x + width, tl.y + height)  # first rectangle
+# make the first row
+row = [r]
+for i in range(1, cols):
+r += delta_h  # build next rect to the right
+row.append(r)
+# make result, starts with first row
+rects = [row]
+for i in range(1, rows):
+row = rects[i - 1]  # take previously appended row
+nrow = []  # the new row to append
+for r in row:  # for each previous cell add its downward copy
+nrow.append(r + delta_v)
+rects.append(nrow)  # append new row to result
+return rects
+def util_ensure_widget_calc(annot):
+'''
+Ensure that widgets with /AA/C JavaScript are in array AcroForm/CO
+'''
+annot_obj = mupdf.pdf_annot_obj(annot.this)
+pdf = mupdf.pdf_get_bound_document(annot_obj)
+PDFNAME_CO = mupdf.pdf_new_name("CO")    # = PDF_NAME(CO)
+acro = mupdf.pdf_dict_getl(  # get AcroForm dict
+mupdf.pdf_trailer(pdf),
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+)
+CO = mupdf.pdf_dict_get(acro, PDFNAME_CO)  # = AcroForm/CO
+if not mupdf.pdf_is_array(CO):
+CO = mupdf.pdf_dict_put_array(acro, PDFNAME_CO, 2)
+n = mupdf.pdf_array_len(CO)
+found = 0
+xref = mupdf.pdf_to_num(annot_obj)
+for i in range(n):
+nxref = mupdf.pdf_to_num(mupdf.pdf_array_get(CO, i))
+if xref == nxref:
+found = 1
+break
+if not found:
+mupdf.pdf_array_push(CO, mupdf.pdf_new_indirect(pdf, xref, 0))
+def util_make_rect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
+'''
+Helper for initialising rectangle classes.
+2022-09-02: This is quite different from PyMuPDF's util_make_rect(), which
+uses `goto` in ways that don't easily translate to Python.
+Returns (x0, y0, x1, y1) derived from <args>, then override with p0, p1,
+x0, y0, x1, y1 if they are not None.
+Accepts following forms for <args>:
+() returns all zeros.
+(top-left, bottom-right)
+(top-left, x1, y1)
+(x0, y0, bottom-right)
+(x0, y0, x1, y1)
+(rect)
+Where top-left and bottom-right are (x, y) or something with .x, .y
+members; rect is something with .x0, .y0, .x1, and .y1 members.
+2023-11-18: we now override with p0, p1, x0, y0, x1, y1 if not None.
+'''
+def get_xy( arg):
+if isinstance( arg, (list, tuple)) and len( arg) == 2:
+return arg[0], arg[1]
+if isinstance( arg, (Point, mupdf.FzPoint, mupdf.fz_point)):
+return arg.x, arg.y
+return None, None
+def make_tuple( a):
+if isinstance( a, tuple):
+return a
+if isinstance( a, Point):
+return a.x, a.y
+elif isinstance( a, (Rect, IRect, mupdf.FzRect, mupdf.fz_rect)):
+return a.x0, a.y0, a.x1, a.y1
+if not isinstance( a, (list, tuple)):
+a = a,
+return a
+def handle_args():
+if len(args) == 0:
+return 0, 0, 0, 0
+elif len(args) == 1:
+arg = args[0]
+if isinstance( arg, (list, tuple)) and len( arg) == 2:
+p1, p2 = arg
+ret = *p1, *p2
+assert len(ret) == 4
+return ret
+if isinstance( arg, (list, tuple)) and len( arg) == 3:
+a, b, c = arg
+a = make_tuple(a)
+b = make_tuple(b)
+c = make_tuple(c)
+ret = *a, *b, *c
+assert len(ret) == 4
+return ret
+ret = make_tuple( arg)
+assert len(ret) == 4, f'{arg=} {ret=}'
+return ret
+elif len(args) == 2:
+ret = get_xy( args[0]) + get_xy( args[1])
+assert len(ret) == 4
+return ret
+elif len(args) == 3:
+x0, y0 = get_xy( args[0])
+if (x0, y0) != (None, None):
+return x0, y0, args[1], args[2]
+x1, y1 = get_xy( args[2])
+if (x1, y1) != (None, None):
+return args[0], args[1], x1, y1
+elif len(args) == 4:
+return args[0], args[1], args[2], args[3]
+raise Exception( f'Unrecognised args: {args}')
+ret_x0, ret_y0, ret_x1, ret_y1 = handle_args()
+if p0 is not None:  ret_x0, ret_y0 = get_xy(p0)
+if p1 is not None:  ret_x1, ret_y1 = get_xy(p1)
+if x0 is not None:  ret_x0 = x0
+if y0 is not None:  ret_y0 = y0
+if x1 is not None:  ret_x1 = x1
+if y1 is not None:  ret_y1 = y1
+return ret_x0, ret_y0, ret_x1, ret_y1
+def util_make_irect( *args, p0=None, p1=None, x0=None, y0=None, x1=None, y1=None):
+a, b, c, d = util_make_rect( *args, p0=p0, p1=p1, x0=x0, y0=y0, x1=x1, y1=y1)
+def convert(x, ceil):
+if ceil:
+return int(math.ceil(x))
+else:
+return int(math.floor(x))
+a = convert(a, False)
+b = convert(b, False)
+c = convert(c, True)
+d = convert(d, True)
+return a, b, c, d
+def util_round_rect( rect):
+return JM_py_from_irect(mupdf.fz_round_rect(JM_rect_from_py(rect)))
+def util_transform_rect( rect, matrix):
+if g_use_extra:
+return extra.util_transform_rect( rect, matrix)
+return JM_py_from_rect(mupdf.fz_transform_rect(JM_rect_from_py(rect), JM_matrix_from_py(matrix)))
+def util_intersect_rect( r1, r2):
+return JM_py_from_rect(
+mupdf.fz_intersect_rect(
+JM_rect_from_py(r1),
+JM_rect_from_py(r2),
+)
+)
+def util_is_point_in_rect( p, r):
+return mupdf.fz_is_point_inside_rect(
+JM_point_from_py(p),
+JM_rect_from_py(r),
+)
+def util_include_point_in_rect( r, p):
+return JM_py_from_rect(
+mupdf.fz_include_point_in_rect(
+JM_rect_from_py(r),
+JM_point_from_py(p),
+)
+)
+def util_point_in_quad( P, Q):
+p = JM_point_from_py(P)
+q = JM_quad_from_py(Q)
+return mupdf.fz_is_point_inside_quad(p, q)
+def util_transform_point( point, matrix):
+return JM_py_from_point(
+mupdf.fz_transform_point(
+JM_point_from_py(point),
+JM_matrix_from_py(matrix),
+)
+)
+def util_union_rect( r1, r2):
+return JM_py_from_rect(
+mupdf.fz_union_rect(
+JM_rect_from_py(r1),
+JM_rect_from_py(r2),
+)
+)
+def util_concat_matrix( m1, m2):
+return JM_py_from_matrix(
+mupdf.fz_concat(
+JM_matrix_from_py(m1),
+JM_matrix_from_py(m2),
+)
+)
+def util_invert_matrix(matrix):
+if 0:
+# Use MuPDF's fz_invert_matrix().
+if isinstance( matrix, (tuple, list)):
+matrix = mupdf.FzMatrix( *matrix)
+elif isinstance( matrix, mupdf.fz_matrix):
+matrix = mupdf.FzMatrix( matrix)
+elif isinstance( matrix, Matrix):
+matrix = mupdf.FzMatrix( matrix.a, matrix.b, matrix.c, matrix.d, matrix.e, matrix.f)
+assert isinstance( matrix, mupdf.FzMatrix), f'{type(matrix)=}: {matrix}'
+ret = mupdf.fz_invert_matrix( matrix)
+if ret == matrix and (0
+or abs( matrix.a - 1) >= sys.float_info.epsilon
+or abs( matrix.b - 0) >= sys.float_info.epsilon
+or abs( matrix.c - 0) >= sys.float_info.epsilon
+or abs( matrix.d - 1) >= sys.float_info.epsilon
+):
+# Inversion not possible.
+return 1, ()
+return 0, (ret.a, ret.b, ret.c, ret.d, ret.e, ret.f)
+# Do inversion in python.
+src = JM_matrix_from_py(matrix)
+a = src.a
+det = a * src.d - src.b * src.c
+if det < -sys.float_info.epsilon or det > sys.float_info.epsilon:
+dst = mupdf.FzMatrix()
+rdet = 1 / det
+dst.a = src.d * rdet
+dst.b = -src.b * rdet
+dst.c = -src.c * rdet
+dst.d = a * rdet
+a = -src.e * dst.a - src.f * dst.c
+dst.f = -src.e * dst.b - src.f * dst.d
+dst.e = a
+return 0, (dst.a, dst.b, dst.c, dst.d, dst.e, dst.f)
+return 1, ()
+def util_measure_string( text, fontname, fontsize, encoding):
+font = mupdf.fz_new_base14_font(fontname)
+w = 0
+pos = 0
+while pos < len(text):
+t, c = mupdf.fz_chartorune(text[pos:])
+pos += t
+if encoding == mupdf.PDF_SIMPLE_ENCODING_GREEK:
+c = mupdf.fz_iso8859_7_from_unicode(c)
+elif encoding == mupdf.PDF_SIMPLE_ENCODING_CYRILLIC:
+c = mupdf.fz_windows_1251_from_unicode(c)
+else:
+c = mupdf.fz_windows_1252_from_unicode(c)
+if c < 0:
+c = 0xB7
+g = mupdf.fz_encode_character(font, c)
+dw = mupdf.fz_advance_glyph(font, g, 0)
+w += dw
+ret = w * fontsize
+return ret
+def util_sine_between(C, P, Q):
+# for points C, P, Q compute the sine between lines CP and QP
+c = JM_point_from_py(C)
+p = JM_point_from_py(P)
+q = JM_point_from_py(Q)
+s = mupdf.fz_normalize_vector(mupdf.fz_make_point(q.x - p.x, q.y - p.y))
+m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -p.x, -p.y)
+m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
+m1 = mupdf.fz_concat(m1, m2)
+c = mupdf.fz_transform_point(c, m1)
+c = mupdf.fz_normalize_vector(c)
+return c.y
+def util_hor_matrix(C, P):
+'''
+Return the matrix that maps two points C, P to the x-axis such that
+C -> (0,0) and the image of P have the same distance.
+'''
+c = JM_point_from_py(C)
+p = JM_point_from_py(P)
+# compute (cosine, sine) of vector P-C with double precision:
+s = mupdf.fz_normalize_vector(mupdf.fz_make_point(p.x - c.x, p.y - c.y))
+m1 = mupdf.fz_make_matrix(1, 0, 0, 1, -c.x, -c.y)
+m2 = mupdf.fz_make_matrix(s.x, -s.y, s.y, s.x, 0, 0)
+return JM_py_from_matrix(mupdf.fz_concat(m1, m2))
+def match_string(h0, n0):
+h = 0
+n = 0
+e = h
+delta_h, hc = chartocanon(h0[h:])
+h += delta_h
+delta_n, nc = chartocanon(n0[n:])
+n += delta_n
+while hc == nc:
+e = h
+if hc == ord(' '):
+while 1:
+delta_h, hc = chartocanon(h0[h:])
+h += delta_h
+if hc != ord(' '):
+break
+else:
+delta_h, hc = chartocanon(h0[h:])
+h += delta_h
+if nc == ord(' '):
+while 1:
+delta_n, nc = chartocanon(n0[n:])
+n += delta_n
+if nc != ord(' '):
+break
+else:
+delta_n, nc = chartocanon(n0[n:])
+n += delta_n
+return None if nc != 0 else e
+def on_highlight_char(hits, line, ch):
+assert hits
+assert isinstance(line, mupdf.FzStextLine)
+assert isinstance(ch, mupdf.FzStextChar)
+vfuzz = ch.m_internal.size * hits.vfuzz
+hfuzz = ch.m_internal.size * hits.hfuzz
+ch_quad = JM_char_quad(line, ch)
+if hits.len > 0:
+# fixme: end = hits.quads[-1]
+quad = hits.quads[hits.len - 1]
+end = JM_quad_from_py(quad)
+if ( 1
+and hdist(line.m_internal.dir, end.lr, ch_quad.ll) < hfuzz
+and vdist(line.m_internal.dir, end.lr, ch_quad.ll) < vfuzz
+and hdist(line.m_internal.dir, end.ur, ch_quad.ul) < hfuzz
+and vdist(line.m_internal.dir, end.ur, ch_quad.ul) < vfuzz
+):
+end.ur = ch_quad.ur
+end.lr = ch_quad.lr
+assert hits.quads[-1] == end
+return
+hits.quads.append(ch_quad)
+hits.len += 1
+def page_merge(doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map):
+'''
+Deep-copies a source page to the target.
+Modified version of function of pdfmerge.c: we also copy annotations, but
+we skip some subtypes. In addition we rotate output.
+'''
+if g_use_extra:
+#log( 'Calling C++ extra.page_merge()')
+return extra.page_merge( doc_des, doc_src, page_from, page_to, rotate, links, copy_annots, graft_map)
+# list of object types (per page) we want to copy
+known_page_objs = [
+PDF_NAME('Contents'),
+PDF_NAME('Resources'),
+PDF_NAME('MediaBox'),
+PDF_NAME('CropBox'),
+PDF_NAME('BleedBox'),
+PDF_NAME('TrimBox'),
+PDF_NAME('ArtBox'),
+PDF_NAME('Rotate'),
+PDF_NAME('UserUnit'),
+]
+page_ref = mupdf.pdf_lookup_page_obj(doc_src, page_from)
+# make new page dict in dest doc
+page_dict = mupdf.pdf_new_dict(doc_des, 4)
+mupdf.pdf_dict_put(page_dict, PDF_NAME('Type'), PDF_NAME('Page'))
+# copy objects of source page into it
+for i in range( len(known_page_objs)):
+obj = mupdf.pdf_dict_get_inheritable( page_ref, known_page_objs[i])
+if obj.m_internal:
+#log( '{=type(graft_map) type(graft_map.this)}')
+mupdf.pdf_dict_put( page_dict, known_page_objs[i], mupdf.pdf_graft_mapped_object(graft_map.this, obj))
+# Copy annotations, but skip Link, Popup, IRT, Widget types
+# If selected, remove dict keys P (parent) and Popup
+if copy_annots:
+old_annots = mupdf.pdf_dict_get( page_ref, PDF_NAME('Annots'))
+n = mupdf.pdf_array_len( old_annots)
+if n > 0:
+new_annots = mupdf.pdf_dict_put_array( page_dict, PDF_NAME('Annots'), n)
+for i in range(n):
+o = mupdf.pdf_array_get( old_annots, i)
+if not o.m_internal or not mupdf.pdf_is_dict(o):
+continue    # skip non-dict items
+if mupdf.pdf_dict_gets( o, "IRT").m_internal:
+continue
+subtype = mupdf.pdf_dict_get( o, PDF_NAME('Subtype'))
+if mupdf.pdf_name_eq( subtype, PDF_NAME('Link')):
+continue
+if mupdf.pdf_name_eq( subtype, PDF_NAME('Popup')):
+continue
+if mupdf.pdf_name_eq(subtype, PDF_NAME('Widget')):
+continue
+mupdf.pdf_dict_del( o, PDF_NAME('Popup'))
+mupdf.pdf_dict_del( o, PDF_NAME('P'))
+copy_o = mupdf.pdf_graft_mapped_object( graft_map.this, o)
+annot = mupdf.pdf_new_indirect( doc_des, mupdf.pdf_to_num( copy_o), 0)
+mupdf.pdf_array_push( new_annots, annot)
+# rotate the page
+if rotate != -1:
+mupdf.pdf_dict_put_int( page_dict, PDF_NAME('Rotate'), rotate)
+# Now add the page dictionary to dest PDF
+ref = mupdf.pdf_add_object( doc_des, page_dict)
+# Insert new page at specified location
+mupdf.pdf_insert_page( doc_des, page_to, ref)
+def paper_rect(s: str) -> Rect:
+"""Return a Rect for the paper size indicated in string 's'. Must conform to the argument of method 'PaperSize', which will be invoked.
+"""
+width, height = paper_size(s)
+return Rect(0.0, 0.0, width, height)
+def paper_size(s: str) -> tuple:
+"""Return a tuple (width, height) for a given paper format string.
+Notes:
+'A4-L' will return (842, 595), the values for A4 landscape.
+Suffix '-P' and no suffix return the portrait tuple.
+"""
+size = s.lower()
+f = "p"
+if size.endswith("-l"):
+f = "l"
+size = size[:-2]
+if size.endswith("-p"):
+size = size[:-2]
+rc = paper_sizes().get(size, (-1, -1))
+if f == "p":
+return rc
+return (rc[1], rc[0])
+def paper_sizes():
+"""Known paper formats @ 72 dpi as a dictionary. Key is the format string
+like "a4" for ISO-A4. Value is the tuple (width, height).
+Information taken from the following web sites:
+www.din-formate.de
+www.din-formate.info/amerikanische-formate.html
+www.directtools.de/wissen/normen/iso.htm
+"""
+return {
+"a0": (2384, 3370),
+"a1": (1684, 2384),
+"a10": (74, 105),
+"a2": (1191, 1684),
+"a3": (842, 1191),
+"a4": (595, 842),
+"a5": (420, 595),
+"a6": (298, 420),
+"a7": (210, 298),
+"a8": (147, 210),
+"a9": (105, 147),
+"b0": (2835, 4008),
+"b1": (2004, 2835),
+"b10": (88, 125),
+"b2": (1417, 2004),
+"b3": (1001, 1417),
+"b4": (709, 1001),
+"b5": (499, 709),
+"b6": (354, 499),
+"b7": (249, 354),
+"b8": (176, 249),
+"b9": (125, 176),
+"c0": (2599, 3677),
+"c1": (1837, 2599),
+"c10": (79, 113),
+"c2": (1298, 1837),
+"c3": (918, 1298),
+"c4": (649, 918),
+"c5": (459, 649),
+"c6": (323, 459),
+"c7": (230, 323),
+"c8": (162, 230),
+"c9": (113, 162),
+"card-4x6": (288, 432),
+"card-5x7": (360, 504),
+"commercial": (297, 684),
+"executive": (522, 756),
+"invoice": (396, 612),
+"ledger": (792, 1224),
+"legal": (612, 1008),
+"legal-13": (612, 936),
+"letter": (612, 792),
+"monarch": (279, 540),
+"tabloid-extra": (864, 1296),
+}
+def pdf_lookup_page_loc(doc, needle):
+return mupdf.pdf_lookup_page_loc(doc, needle)
+def pdfobj_string(o, prefix=''):
+'''
+Returns description of mupdf.PdfObj (wrapper for pdf_obj) <o>.
+'''
+assert 0, 'use mupdf.pdf_debug_obj() ?'
+ret = ''
+if mupdf.pdf_is_array(o):
+l = mupdf.pdf_array_len(o)
+ret += f'array {l}\n'
+for i in range(l):
+oo = mupdf.pdf_array_get(o, i)
+ret += pdfobj_string(oo, prefix + '    ')
+ret += '\n'
+elif mupdf.pdf_is_bool(o):
+ret += f'bool: {o.array_get_bool()}\n'
+elif mupdf.pdf_is_dict(o):
+l = mupdf.pdf_dict_len(o)
+ret += f'dict {l}\n'
+for i in range(l):
+key = mupdf.pdf_dict_get_key(o, i)
+value = mupdf.pdf_dict_get( o, key)
+ret += f'{prefix} {key}: '
+ret += pdfobj_string( value, prefix + '    ')
+ret += '\n'
+elif mupdf.pdf_is_embedded_file(o):
+ret += f'embedded_file: {o.embedded_file_name()}\n'
+elif mupdf.pdf_is_indirect(o):
+ret += f'indirect: ...\n'
+elif mupdf.pdf_is_int(o):
+ret += f'int: {mupdf.pdf_to_int(o)}\n'
+elif mupdf.pdf_is_jpx_image(o):
+ret += f'jpx_image:\n'
+elif mupdf.pdf_is_name(o):
+ret += f'name: {mupdf.pdf_to_name(o)}\n'
+elif o.pdf_is_null:
+ret += f'null\n'
+#elif o.pdf_is_number:
+#    ret += f'number\n'
+elif o.pdf_is_real:
+ret += f'real: {o.pdf_to_real()}\n'
+elif mupdf.pdf_is_stream(o):
+ret += f'stream\n'
+elif mupdf.pdf_is_string(o):
+ret += f'string: {mupdf.pdf_to_string(o)}\n'
+else:
+ret += '<>\n'
+return ret
+def repair_mono_font(page: "Page", font: "Font") -> None:
+"""Repair character spacing for mono fonts.
+Notes:
+Some mono-spaced fonts are displayed with a too large character
+distance, e.g. "a b c" instead of "abc". This utility adds an entry
+"/W[0 65535 w]" to the descendent font(s) of font. The float w is
+taken to be the width of 0x20 (space).
+This should enforce viewers to use 'w' as the character width.
+Args:
+page: pymupdf.Page object.
+font: pymupdf.Font object.
+"""
+if not font.flags["mono"]:  # font not flagged as monospaced
+return None
+doc = page.parent  # the document
+fontlist = page.get_fonts()  # list of fonts on page
+xrefs = [  # list of objects referring to font
+f[0]
+for f in fontlist
+if (f[3] == font.name and f[4].startswith("F") and f[5].startswith("Identity"))
+]
+if xrefs == []:  # our font does not occur
+return
+xrefs = set(xrefs)  # drop any double counts
+width = int(round((font.glyph_advance(32) * 1000)))
+for xref in xrefs:
+if not TOOLS.set_font_width(doc, xref, width):
+log("Cannot set width for '%s' in xref %i" % (font.name, xref))
+def sRGB_to_pdf(srgb: int) -> tuple:
+"""Convert sRGB color code to a PDF color triple.
+There is **no error checking** for performance reasons!
+Args:
+srgb: (int) RRGGBB (red, green, blue), each color in range(255).
+Returns:
+Tuple (red, green, blue) each item in interval 0 <= item <= 1.
+"""
+t = sRGB_to_rgb(srgb)
+return t[0] / 255.0, t[1] / 255.0, t[2] / 255.0
+def sRGB_to_rgb(srgb: int) -> tuple:
+"""Convert sRGB color code to an RGB color triple.
+There is **no error checking** for performance reasons!
+Args:
+srgb: (int) SSRRGGBB (red, green, blue), each color in range(255).
+With MuPDF < 1.26, `s` is always 0.
+Returns:
+Tuple (red, green, blue) each item in interval 0 <= item <= 255.
+"""
+srgb &= 0xffffff
+r = srgb >> 16
+g = (srgb - (r << 16)) >> 8
+b = srgb - (r << 16) - (g << 8)
+return (r, g, b)
+def string_in_names_list(p, names_list):
+n = mupdf.pdf_array_len( names_list) if names_list else 0
+str_ = mupdf.pdf_to_text_string( p)
+for i in range(0, n, 2):
+if mupdf.pdf_to_text_string( mupdf.pdf_array_get( names_list, i)) == str_:
+return 1
+return 0
+def strip_outline(doc, outlines, page_count, page_object_nums, names_list):
+'''
+Returns (count, first, prev).
+'''
+first = None
+count = 0
+current = outlines
+prev = None
+while current.m_internal:
+# Strip any children to start with. This takes care of
+# First / Last / Count for us.
+nc = strip_outlines(doc, current, page_count, page_object_nums, names_list)
+if not dest_is_valid(current, page_count, page_object_nums, names_list):
+if nc == 0:
+# Outline with invalid dest and no children. Drop it by
+# pulling the next one in here.
+next = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
+if not next.m_internal:
+# There is no next one to pull in
+if prev.m_internal:
+mupdf.pdf_dict_del(prev, PDF_NAME('Next'))
+elif prev.m_internal:
+mupdf.pdf_dict_put(prev, PDF_NAME('Next'), next)
+mupdf.pdf_dict_put(next, PDF_NAME('Prev'), prev)
+else:
+mupdf.pdf_dict_del(next, PDF_NAME('Prev'))
+current = next
+else:
+# Outline with invalid dest, but children. Just drop the dest.
+mupdf.pdf_dict_del(current, PDF_NAME('Dest'))
+mupdf.pdf_dict_del(current, PDF_NAME('A'))
+current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
+else:
+# Keep this one
+if not first or not first.m_internal:
+first = current
+prev = current
+current = mupdf.pdf_dict_get(current, PDF_NAME('Next'))
+count += 1
+return count, first, prev
+def strip_outlines(doc, outlines, page_count, page_object_nums, names_list):
+if not outlines.m_internal:
+return 0
+first = mupdf.pdf_dict_get(outlines, PDF_NAME('First'))
+if not first.m_internal:
+nc = 0
+else:
+nc, first, last = strip_outline(doc, first, page_count, page_object_nums, names_list)
+if nc == 0:
+mupdf.pdf_dict_del(outlines, PDF_NAME('First'))
+mupdf.pdf_dict_del(outlines, PDF_NAME('Last'))
+mupdf.pdf_dict_del(outlines, PDF_NAME('Count'))
+else:
+old_count = mupdf.pdf_to_int(mupdf.pdf_dict_get(outlines, PDF_NAME('Count')))
+mupdf.pdf_dict_put(outlines, PDF_NAME('First'), first)
+mupdf.pdf_dict_put(outlines, PDF_NAME('Last'), last)
+mupdf.pdf_dict_put(outlines, PDF_NAME('Count'), mupdf.pdf_new_int(nc if old_count > 0 else -nc))
+return nc
+trace_device_FILL_PATH = 1
+trace_device_STROKE_PATH = 2
+trace_device_CLIP_PATH = 3
+trace_device_CLIP_STROKE_PATH = 4
+def unicode_to_glyph_name(ch: int) -> str:
+"""
+Convenience function accessing unicodedata.
+"""
+import unicodedata
+try:
+name = unicodedata.name(chr(ch))
+except ValueError:
+name = ".notdef"
+return name
+def vdist(dir, a, b):
+dx = b.x - a.x
+dy = b.y - a.y
+return mupdf.fz_abs(dx * dir.y + dy * dir.x)
+def apply_pages(
+path,
+pagefn,
+*,
+pagefn_args=(),
+pagefn_kwargs=dict(),
+initfn=None,
+initfn_args=(),
+initfn_kwargs=dict(),
+pages=None,
+method='single',
+concurrency=None,
+_stats=False,
+):
+'''
+Returns list of results from `pagefn()`, optionally using concurrency for
+speed.
+Args:
+path:
+Path of document.
+pagefn:
+Function to call for each page; is passed (page, *pagefn_args,
+**pagefn_kwargs). Return value is added to list that we return. If
+`method` is not 'single', must be a top-level function - nested
+functions don't work with concurrency.
+pagefn_args
+pagefn_kwargs:
+Additional args to pass to `pagefn`. Must be picklable.
+initfn:
+If true, called once in each worker process; is passed
+(*initfn_args, **initfn_kwargs).
+initfn_args
+initfn_kwargs:
+Args to pass to initfn. Must be picklable.
+pages:
+List of page numbers to process, or None to include all pages.
+method:
+'single'
+Do not use concurrency.
+'mp'
+Operate concurrently using Python's `multiprocessing` module.
+'fork'
+Operate concurrently using custom implementation with
+`os.fork()`. Does not work on Windows.
+concurrency:
+Number of worker processes to use when operating concurrently. If
+None, we use the number of available CPUs.
+_stats:
+Internal, may change or be removed. If true, we output simple
+timing diagnostics.
+Note: We require a file path rather than a Document, because Document
+instances do not work properly after a fork - internal file descriptor
+offsets are shared between the parent and child processes.
+'''
+if _stats:
+t0 = time.time()
+if method == 'single':
+if initfn:
+initfn(*initfn_args, **initfn_kwargs)
+ret = list()
+document = Document(path)
+if pages is None:
+pages = range(len(document))
+for pno in pages:
+page = document[pno]
+r = pagefn(page, *pagefn_args, **initfn_kwargs)
+ret.append(r)
+else:
+# Use concurrency.
+#
+from . import _apply_pages
+if pages is None:
+if _stats:
+t = time.time()
+with Document(path) as document:
+num_pages = len(document)
+pages = list(range(num_pages))
+if _stats:
+t = time.time() - t
+log(f'{t:.2f}s: count pages.')
+if _stats:
+t = time.time()
+if method == 'mp':
+ret = _apply_pages._multiprocessing(
+path,
+pages,
+pagefn,
+pagefn_args,
+pagefn_kwargs,
+initfn,
+initfn_args,
+initfn_kwargs,
+concurrency,
+_stats,
+)
+elif method == 'fork':
+ret = _apply_pages._fork(
+path,
+pages,
+pagefn,
+pagefn_args,
+pagefn_kwargs,
+initfn,
+initfn_args,
+initfn_kwargs,
+concurrency,
+_stats,
+)
+else:
+assert 0, f'Unrecognised {method=}.'
+if _stats:
+t = time.time() - t
+log(f'{t:.2f}s: work.')
+if _stats:
+t = time.time() - t0
+log(f'{t:.2f}s: total.')
+return ret
+def get_text(
+path,
+*,
+pages=None,
+method='single',
+concurrency=None,
+option='text',
+clip=None,
+flags=None,
+textpage=None,
+sort=False,
+delimiters=None,
+_stats=False,
+):
+'''
+Returns list of results from `Page.get_text()`, optionally using
+concurrency for speed.
+Args:
+path:
+Path of document.
+pages:
+List of page numbers to process, or None to include all pages.
+method:
+'single'
+Do not use concurrency.
+'mp'
+Operate concurrently using Python's `multiprocessing` module.
+'fork'
+Operate concurrently using custom implementation with
+`os.fork`. Does not work on Windows.
+concurrency:
+Number of worker processes to use when operating concurrently. If
+None, we use the number of available CPUs.
+option
+clip
+flags
+textpage
+sort
+delimiters:
+Passed to internal calls to `Page.get_text()`.
+'''
+args_dict = dict(
+option=option,
+clip=clip,
+flags=flags,
+textpage=textpage,
+sort=sort,
+delimiters=delimiters,
+)
+return apply_pages(
+path,
+Page.get_text,
+pagefn_kwargs=args_dict,
+pages=pages,
+method=method,
+concurrency=concurrency,
+_stats=_stats,
+)
+class TOOLS:
+'''
+We use @staticmethod to avoid the need to create an instance of this class.
+'''
+def _derotate_matrix(page):
+if isinstance(page, mupdf.PdfPage):
+return JM_py_from_matrix(JM_derotate_page_matrix(page))
+else:
+return JM_py_from_matrix(mupdf.FzMatrix())
+@staticmethod
+def _fill_widget(annot, widget):
+val = JM_get_widget_properties(annot, widget)
+widget.rect = Rect(annot.rect)
+widget.xref = annot.xref
+widget.parent = annot.parent
+widget._annot = annot  # backpointer to annot object
+if not widget.script:
+widget.script = None
+if not widget.script_stroke:
+widget.script_stroke = None
+if not widget.script_format:
+widget.script_format = None
+if not widget.script_change:
+widget.script_change = None
+if not widget.script_calc:
+widget.script_calc = None
+if not widget.script_blur:
+widget.script_blur = None
+if not widget.script_focus:
+widget.script_focus = None
+return val
+@staticmethod
+def _get_all_contents(page):
+page = _as_pdf_page(page.this)
+res = JM_read_contents(page.obj())
+result = JM_BinFromBuffer( res)
+return result
+@staticmethod
+def _insert_contents(page, newcont, overlay=1):
+"""Add bytes as a new /Contents object for a page, and return its xref."""
+pdfpage = _as_pdf_page(page, required=1)
+contbuf = JM_BufferFromBytes(newcont)
+xref = JM_insert_contents(pdfpage.doc(), pdfpage.obj(), contbuf, overlay)
+#fixme: pdfpage->doc->dirty = 1;
+return xref
+@staticmethod
+def _le_annot_parms(annot, p1, p2, fill_color):
+"""Get common parameters for making annot line end symbols.
+Returns:
+m: matrix that maps p1, p2 to points L, P on the x-axis
+im: its inverse
+L, P: transformed p1, p2
+w: line width
+scol: stroke color string
+fcol: fill color store_shrink
+opacity: opacity string (gs command)
+"""
+w = annot.border["width"]  # line width
+sc = annot.colors["stroke"]  # stroke color
+if not sc:  # black if missing
+sc = (0,0,0)
+scol = " ".join(map(str, sc)) + " RG\n"
+if fill_color:
+fc = fill_color
+else:
+fc = annot.colors["fill"]  # fill color
+if not fc:
+fc = (1,1,1)  # white if missing
+fcol = " ".join(map(str, fc)) + " rg\n"
+# nr = annot.rect
+np1 = p1                   # point coord relative to annot rect
+np2 = p2                   # point coord relative to annot rect
+m = Matrix(util_hor_matrix(np1, np2))  # matrix makes the line horizontal
+im = ~m                            # inverted matrix
+L = np1 * m                        # converted start (left) point
+R = np2 * m                        # converted end (right) point
+if 0 <= annot.opacity < 1:
+opacity = "/H gs\n"
+else:
+opacity = ""
+return m, im, L, R, w, scol, fcol, opacity
+@staticmethod
+def _le_butt(annot, p1, p2, lr, fill_color):
+"""Make stream commands for butt line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 3
+d = shift * max(1, w)
+M = R if lr else L
+top = (M + (0, -d/2.)) * im
+bot = (M + (0, d/2.)) * im
+ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
+ap += "%f %f l\n" % (bot.x, bot.y)
+ap += _format_g(w) + " w\n"
+ap += scol + "s\nQ\n"
+return ap
+@staticmethod
+def _le_circle(annot, p1, p2, lr, fill_color):
+"""Make stream commands for circle line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5             # 2*shift*width = length of square edge
+d = shift * max(1, w)
+M = R - (d/2., 0) if lr else L + (d/2., 0)
+r = Rect(M, M) + (-d, -d, d, d)         # the square
+ap = "q\n" + opacity + TOOLS._oval_string(r.tl * im, r.tr * im, r.br * im, r.bl * im)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "b\nQ\n"
+return ap
+@staticmethod
+def _le_closedarrow(annot, p1, p2, lr, fill_color):
+"""Make stream commands for closed arrow line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5
+d = shift * max(1, w)
+p2 = R + (d/2., 0) if lr else L - (d/2., 0)
+p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
+p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
+p1 *= im
+p2 *= im
+p3 *= im
+ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
+ap += "%f %f l\n" % (p2.x, p2.y)
+ap += "%f %f l\n" % (p3.x, p3.y)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "b\nQ\n"
+return ap
+@staticmethod
+def _le_diamond(annot, p1, p2, lr, fill_color):
+"""Make stream commands for diamond line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5             # 2*shift*width = length of square edge
+d = shift * max(1, w)
+M = R - (d/2., 0) if lr else L + (d/2., 0)
+r = Rect(M, M) + (-d, -d, d, d)         # the square
+# the square makes line longer by (2*shift - 1)*width
+p = (r.tl + (r.bl - r.tl) * 0.5) * im
+ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
+p = (r.tl + (r.tr - r.tl) * 0.5) * im
+ap += "%f %f l\n"   % (p.x, p.y)
+p = (r.tr + (r.br - r.tr) * 0.5) * im
+ap += "%f %f l\n"   % (p.x, p.y)
+p = (r.br + (r.bl - r.br) * 0.5) * im
+ap += "%f %f l\n"   % (p.x, p.y)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "b\nQ\n"
+return ap
+@staticmethod
+def _le_openarrow(annot, p1, p2, lr, fill_color):
+"""Make stream commands for open arrow line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5
+d = shift * max(1, w)
+p2 = R + (d/2., 0) if lr else L - (d/2., 0)
+p1 = p2 + (-2*d, -d) if lr else p2 + (2*d, -d)
+p3 = p2 + (-2*d, d) if lr else p2 + (2*d, d)
+p1 *= im
+p2 *= im
+p3 *= im
+ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
+ap += "%f %f l\n" % (p2.x, p2.y)
+ap += "%f %f l\n" % (p3.x, p3.y)
+ap += _format_g(w) + " w\n"
+ap += scol + "S\nQ\n"
+return ap
+@staticmethod
+def _le_rclosedarrow(annot, p1, p2, lr, fill_color):
+"""Make stream commands for right closed arrow line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5
+d = shift * max(1, w)
+p2 = R - (2*d, 0) if lr else L + (2*d, 0)
+p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
+p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
+p1 *= im
+p2 *= im
+p3 *= im
+ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
+ap += "%f %f l\n" % (p2.x, p2.y)
+ap += "%f %f l\n" % (p3.x, p3.y)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "b\nQ\n"
+return ap
+@staticmethod
+def _le_ropenarrow(annot, p1, p2, lr, fill_color):
+"""Make stream commands for right open arrow line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5
+d = shift * max(1, w)
+p2 = R - (d/3., 0) if lr else L + (d/3., 0)
+p1 = p2 + (2*d, -d) if lr else p2 + (-2*d, -d)
+p3 = p2 + (2*d, d) if lr else p2 + (-2*d, d)
+p1 *= im
+p2 *= im
+p3 *= im
+ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y)
+ap += "%f %f l\n" % (p2.x, p2.y)
+ap += "%f %f l\n" % (p3.x, p3.y)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "S\nQ\n"
+return ap
+@staticmethod
+def _le_slash(annot, p1, p2, lr, fill_color):
+"""Make stream commands for slash line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+rw = 1.1547 * max(1, w) * 1.0         # makes rect diagonal a 30 deg inclination
+M = R if lr else L
+r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w)
+top = r.tl * im
+bot = r.br * im
+ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y)
+ap += "%f %f l\n" % (bot.x, bot.y)
+ap += _format_g(w) + " w\n"
+ap += scol + "s\nQ\n"
+return ap
+@staticmethod
+def _le_square(annot, p1, p2, lr, fill_color):
+"""Make stream commands for square line end symbol. "lr" denotes left (False) or right point.
+"""
+m, im, L, R, w, scol, fcol, opacity = TOOLS._le_annot_parms(annot, p1, p2, fill_color)
+shift = 2.5             # 2*shift*width = length of square edge
+d = shift * max(1, w)
+M = R - (d/2., 0) if lr else L + (d/2., 0)
+r = Rect(M, M) + (-d, -d, d, d)         # the square
+# the square makes line longer by (2*shift - 1)*width
+p = r.tl * im
+ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y)
+p = r.tr * im
+ap += "%f %f l\n"   % (p.x, p.y)
+p = r.br * im
+ap += "%f %f l\n"   % (p.x, p.y)
+p = r.bl * im
+ap += "%f %f l\n"   % (p.x, p.y)
+ap += _format_g(w) + " w\n"
+ap += scol + fcol + "b\nQ\n"
+return ap
+@staticmethod
+def _oval_string(p1, p2, p3, p4):
+"""Return /AP string defining an oval within a 4-polygon provided as points
+"""
+def bezier(p, q, r):
+f = "%f %f %f %f %f %f c\n"
+return f % (p.x, p.y, q.x, q.y, r.x, r.y)
+kappa = 0.55228474983              # magic number
+ml = p1 + (p4 - p1) * 0.5          # middle points ...
+mo = p1 + (p2 - p1) * 0.5          # for each ...
+mr = p2 + (p3 - p2) * 0.5          # polygon ...
+mu = p4 + (p3 - p4) * 0.5          # side
+ol1 = ml + (p1 - ml) * kappa       # the 8 bezier
+ol2 = mo + (p1 - mo) * kappa       # helper points
+or1 = mo + (p2 - mo) * kappa
+or2 = mr + (p2 - mr) * kappa
+ur1 = mr + (p3 - mr) * kappa
+ur2 = mu + (p3 - mu) * kappa
+ul1 = mu + (p4 - mu) * kappa
+ul2 = ml + (p4 - ml) * kappa
+# now draw, starting from middle point of left side
+ap = "%f %f m\n" % (ml.x, ml.y)
+ap += bezier(ol1, ol2, mo)
+ap += bezier(or1, or2, mr)
+ap += bezier(ur1, ur2, mu)
+ap += bezier(ul1, ul2, ml)
+return ap
+@staticmethod
+def _parse_da(annot):
+if g_use_extra:
+val = extra.Tools_parse_da( annot.this)
+else:
+def Tools__parse_da(annot):
+this_annot = annot.this
+assert isinstance(this_annot, mupdf.PdfAnnot)
+this_annot_obj = mupdf.pdf_annot_obj( this_annot)
+pdf = mupdf.pdf_get_bound_document( this_annot_obj)
+try:
+da = mupdf.pdf_dict_get_inheritable( this_annot_obj, PDF_NAME('DA'))
+if not da.m_internal:
+trailer = mupdf.pdf_trailer(pdf)
+da = mupdf.pdf_dict_getl(trailer,
+PDF_NAME('Root'),
+PDF_NAME('AcroForm'),
+PDF_NAME('DA'),
+)
+da_str = mupdf.pdf_to_text_string(da)
+except Exception:
+if g_exceptions_verbose:    exception_info()
+return
+return da_str
+val = Tools__parse_da(annot)
+if not val:
+return ((0,), "", 0)
+font = "Helv"
+fsize = 12
+col = (0, 0, 0)
+dat = val.split()  # split on any whitespace
+for i, item in enumerate(dat):
+if item == "Tf":
+font = dat[i - 2][1:]
+fsize = float(dat[i - 1])
+dat[i] = dat[i-1] = dat[i-2] = ""
+continue
+if item == "g":            # unicolor text
+col = [(float(dat[i - 1]))]
+dat[i] = dat[i-1] = ""
+continue
+if item == "rg":           # RGB colored text
+col = [float(f) for f in dat[i - 3:i]]
+dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = ""
+continue
+if item == "k":           # CMYK colored text
+col = [float(f) for f in dat[i - 4:i]]
+dat[i] = dat[i-1] = dat[i-2] = dat[i-3] = dat[i-4] = ""
+continue
+val = (col, font, fsize)
+return val
+@staticmethod
+def _reset_widget(annot):
+this_annot = annot
+this_annot_obj = mupdf.pdf_annot_obj(this_annot)
+pdf = mupdf.pdf_get_bound_document(this_annot_obj)
+mupdf.pdf_field_reset(pdf, this_annot_obj)
+@staticmethod
+def _rotate_matrix(page):
+pdfpage = page._pdf_page(required=False)
+if not pdfpage.m_internal:
+return JM_py_from_matrix(mupdf.FzMatrix())
+return JM_py_from_matrix(JM_rotate_page_matrix(pdfpage))
+@staticmethod
+def _save_widget(annot, widget):
+JM_set_widget_properties(annot, widget)
+def _update_da(annot, da_str):
+if g_use_extra:
+extra.Tools_update_da( annot.this, da_str)
+else:
+try:
+this_annot = annot.this
+assert isinstance(this_annot, mupdf.PdfAnnot)
+mupdf.pdf_dict_put_text_string(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DA'), da_str)
+mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('DS'))    # /* not supported */
+mupdf.pdf_dict_del(mupdf.pdf_annot_obj(this_annot), PDF_NAME('RC'))    # /* not supported */
+except Exception:
+if g_exceptions_verbose:    exception_info()
+return
+return
+@staticmethod
+def gen_id():
+global TOOLS_JM_UNIQUE_ID
+TOOLS_JM_UNIQUE_ID += 1
+return TOOLS_JM_UNIQUE_ID
+@staticmethod
+def glyph_cache_empty():
+'''
+Empty the glyph cache.
+'''
+mupdf.fz_purge_glyph_cache()
+@staticmethod
+def image_profile(stream, keep_image=0):
+'''
+Metadata of an image binary stream.
+'''
+return JM_image_profile(stream, keep_image)
+@staticmethod
+def mupdf_display_errors(on=None):
+'''
+Set MuPDF error display to True or False.
+'''
+global JM_mupdf_show_errors
+if on is not None:
+JM_mupdf_show_errors = bool(on)
+return JM_mupdf_show_errors
+@staticmethod
+def mupdf_display_warnings(on=None):
+'''
+Set MuPDF warnings display to True or False.
+'''
+global JM_mupdf_show_warnings
+if on is not None:
+JM_mupdf_show_warnings = bool(on)
+return JM_mupdf_show_warnings
+@staticmethod
+def mupdf_version():
+'''Get version of MuPDF binary build.'''
+return mupdf.FZ_VERSION
+@staticmethod
+def mupdf_warnings(reset=1):
+'''
+Get the MuPDF warnings/errors with optional reset (default).
+'''
+# Get any trailing `... repeated <N> times...` message.
+mupdf.fz_flush_warnings()
+ret = '\n'.join( JM_mupdf_warnings_store)
+if reset:
+TOOLS.reset_mupdf_warnings()
+return ret
+@staticmethod
+def reset_mupdf_warnings():
+global JM_mupdf_warnings_store
+JM_mupdf_warnings_store = list()
+@staticmethod
+def set_aa_level(level):
+'''
+Set anti-aliasing level.
+'''
+mupdf.fz_set_aa_level(level)
+@staticmethod
+def set_annot_stem( stem=None):
+global JM_annot_id_stem
+if stem is None:
+return JM_annot_id_stem
+len_ = len(stem) + 1
+if len_ > 50:
+len_ = 50
+JM_annot_id_stem = stem[:50]
+return JM_annot_id_stem
+@staticmethod
+def set_font_width(doc, xref, width):
+pdf = _as_pdf_document(doc, required=0)
+if not pdf.m_internal:
+return False
+font = mupdf.pdf_load_object(pdf, xref)
+dfonts = mupdf.pdf_dict_get(font, PDF_NAME('DescendantFonts'))
+if mupdf.pdf_is_array(dfonts):
+n = mupdf.pdf_array_len(dfonts)
+for i in range(n):
+dfont = mupdf.pdf_array_get(dfonts, i)
+warray = mupdf.pdf_new_array(pdf, 3)
+mupdf.pdf_array_push(warray, mupdf.pdf_new_int(0))
+mupdf.pdf_array_push(warray, mupdf.pdf_new_int(65535))
+mupdf.pdf_array_push(warray, mupdf.pdf_new_int(width))
+mupdf.pdf_dict_put(dfont, PDF_NAME('W'), warray)
+return True
+@staticmethod
+def set_graphics_min_line_width(min_line_width):
+'''
+Set the graphics minimum line width.
+'''
+mupdf.fz_set_graphics_min_line_width(min_line_width)
+@staticmethod
+def set_icc( on=0):
+"""Set ICC color handling on or off."""
+if on:
+if mupdf.FZ_ENABLE_ICC:
+mupdf.fz_enable_icc()
+else:
+RAISEPY( "MuPDF built w/o ICC support",PyExc_ValueError)
+elif mupdf.FZ_ENABLE_ICC:
+mupdf.fz_disable_icc()
+@staticmethod
+def set_low_memory( on=None):
+"""Set / unset MuPDF device caching."""
+if on is not None:
+_globals.no_device_caching = bool(on)
+return _globals.no_device_caching
+@staticmethod
+def set_small_glyph_heights(on=None):
+"""Set / unset small glyph heights."""
+if on is not None:
+_globals.small_glyph_heights = bool(on)
+if g_use_extra:
+extra.set_small_glyph_heights(_globals.small_glyph_heights)
+return _globals.small_glyph_heights
+@staticmethod
+def set_subset_fontnames(on=None):
+'''
+Set / unset returning fontnames with their subset prefix.
+'''
+if on is not None:
+_globals.subset_fontnames = bool(on)
+if g_use_extra:
+extra.set_subset_fontnames(_globals.subset_fontnames)
+return _globals.subset_fontnames
+@staticmethod
+def show_aa_level():
+'''
+Show anti-aliasing values.
+'''
+return dict(
+graphics = mupdf.fz_graphics_aa_level(),
+text = mupdf.fz_text_aa_level(),
+graphics_min_line_width = mupdf.fz_graphics_min_line_width(),
+)
+@staticmethod
+def store_maxsize():
+'''
+MuPDF store size limit.
+'''
+# fixme: return gctx->store->max.
+return None
+@staticmethod
+def store_shrink(percent):
+'''
+Free 'percent' of current store size.
+'''
+if percent >= 100:
+mupdf.fz_empty_store()
+return 0
+if percent > 0:
+mupdf.fz_shrink_store( 100 - percent)
+# fixme: return gctx->store->size.
+@staticmethod
+def store_size():
+'''
+MuPDF current store size.
+'''
+# fixme: return gctx->store->size.
+return None
+@staticmethod
+def unset_quad_corrections(on=None):
+'''
+Set ascender / descender corrections on or off.
+'''
+if on is not None:
+_globals.skip_quad_corrections = bool(on)
+if g_use_extra:
+extra.set_skip_quad_corrections(_globals.skip_quad_corrections)
+return _globals.skip_quad_corrections
+# fixme: also defined at top-level.
+JM_annot_id_stem = 'fitz'
+fitz_config = JM_fitz_config()
+# Callbacks not yet supported with cppyy.
+if not mupdf_cppyy:
+mupdf.fz_set_warning_callback(JM_mupdf_warning)
+mupdf.fz_set_error_callback(JM_mupdf_error)
+# If there are pending warnings when we exit, we end up in this sequence:
+#
+#   atexit()
+#   -> mupdf::internal_thread_state::~internal_thread_state()
+#   -> fz_drop_context()
+#   -> fz_flush_warnings()
+#   -> SWIG Director code
+#   -> Python calling JM_mupdf_warning().
+#
+# Unfortunately this causes a SEGV, seemingly because the SWIG Director code has
+# already been torn down.
+#
+# So we use a Python atexit handler to explicitly call fz_flush_warnings();
+# this appears to happen early enough for the Director machinery to still
+# work. So in the sequence above, fz_flush_warnings() will find that there are
+# no pending warnings and will not attempt to call JM_mupdf_warning().
+#
+def _atexit():
+#log( 'PyMuPDF/src/__init__.py:_atexit() called')
+mupdf.fz_flush_warnings()
+mupdf.fz_set_warning_callback(None)
+mupdf.fz_set_error_callback(None)
+#log( '_atexit() returning')
+atexit.register( _atexit)
+# List of (name, red, green, blue) where:
+#   name: upper-case name.
+#   red, green, blue: integer in range 0..255.
+#
+from . import _wxcolors
+_wxcolors = _wxcolors._wxcolors
+# Dict mapping from name to (red, green, blue).
+#   name: lower-case name.
+#   red, green, blue: float in range 0..1.
+#
+pdfcolor = dict()
+for name, r, g, b in _wxcolors:
+pdfcolor[name.lower()] = (r/255, g/255, b/255)
+def colors_pdf_dict():
+'''
+Returns dict mapping from name to (red, green, blue).
+name: lower-case name.
+red, green, blue: float in range 0..1.
+'''
+return pdfcolor
+def colors_wx_list():
+'''
+Returns list of (name, red, green, blue) tuples:
+name: upper-case name.
+red, green, blue: integers in range 0..255.
+'''
+return _wxcolors
+# We cannot import utils earlier because it imports this .py file itself and
+# uses some pymupdf.* types in function typing.
+#
+from . import utils
+# Use utils.*() fns for some class methods.
+#
+recover_bbox_quad           = utils.recover_bbox_quad
+recover_char_quad           = utils.recover_char_quad
+recover_line_quad           = utils.recover_line_quad
+recover_quad                = utils.recover_quad
+recover_span_quad           = utils.recover_span_quad
+Annot.get_text              = utils.get_text
+Annot.get_textbox           = utils.get_textbox
+Document._do_links          = utils.do_links
+Document._do_widgets        = utils.do_widgets
+Document.del_toc_item       = utils.del_toc_item
+Document.get_char_widths    = utils.get_char_widths
+Document.get_oc             = utils.get_oc
+Document.get_ocmd           = utils.get_ocmd
+Document.get_page_labels    = utils.get_page_labels
+Document.get_page_numbers   = utils.get_page_numbers
+Document.get_page_pixmap    = utils.get_page_pixmap
+Document.get_page_text      = utils.get_page_text
+Document.get_toc            = utils.get_toc
+Document.has_annots         = utils.has_annots
+Document.has_links          = utils.has_links
+Document.insert_page        = utils.insert_page
+Document.new_page           = utils.new_page
+Document.scrub              = utils.scrub
+Document.search_page_for    = utils.search_page_for
+Document.set_metadata       = utils.set_metadata
+Document.set_oc             = utils.set_oc
+Document.set_ocmd           = utils.set_ocmd
+Document.set_page_labels    = utils.set_page_labels
+Document.set_toc            = utils.set_toc
+Document.set_toc_item       = utils.set_toc_item
+Document.subset_fonts       = utils.subset_fonts
+Document.tobytes            = Document.write
+Document.xref_copy          = utils.xref_copy
+IRect.get_area              = utils.get_area
+Page.apply_redactions       = utils.apply_redactions
+Page.delete_image           = utils.delete_image
+Page.delete_widget          = utils.delete_widget
+Page.draw_bezier            = utils.draw_bezier
+Page.draw_circle            = utils.draw_circle
+Page.draw_curve             = utils.draw_curve
+Page.draw_line              = utils.draw_line
+Page.draw_oval              = utils.draw_oval
+Page.draw_polyline          = utils.draw_polyline
+Page.draw_quad              = utils.draw_quad
+Page.draw_rect              = utils.draw_rect
+Page.draw_sector            = utils.draw_sector
+Page.draw_squiggle          = utils.draw_squiggle
+Page.draw_zigzag            = utils.draw_zigzag
+Page.get_image_info         = utils.get_image_info
+Page.get_image_rects        = utils.get_image_rects
+Page.get_label              = utils.get_label
+Page.get_links              = utils.get_links
+Page.get_pixmap             = utils.get_pixmap
+Page.get_text               = utils.get_text
+Page.get_text_blocks        = utils.get_text_blocks
+Page.get_text_selection     = utils.get_text_selection
+Page.get_text_words         = utils.get_text_words
+Page.get_textbox            = utils.get_textbox
+Page.get_textpage_ocr       = utils.get_textpage_ocr
+Page.insert_image           = utils.insert_image
+Page.insert_link            = utils.insert_link
+Page.insert_text            = utils.insert_text
+Page.insert_textbox         = utils.insert_textbox
+Page.insert_htmlbox         = utils.insert_htmlbox
+Page.new_shape              = lambda x: utils.Shape(x)
+Page.replace_image          = utils.replace_image
+Page.search_for             = utils.search_for
+Page.show_pdf_page          = utils.show_pdf_page
+Page.update_link            = utils.update_link
+Page.write_text             = utils.write_text
+Shape                       = utils.Shape
+from .table import find_tables
+Page.find_tables = find_tables
+Rect.get_area               = utils.get_area
+TextWriter.fill_textbox     = utils.fill_textbox
+class FitzDeprecation(DeprecationWarning):
+pass
+def restore_aliases():
+warnings.filterwarnings( "once", category=FitzDeprecation)
+def showthis(msg, cat, filename, lineno, file=None, line=None):
+text = warnings.formatwarning(msg, cat, filename, lineno, line=line)
+s = text.find("FitzDeprecation")
+if s < 0:
+log(text)
+return
+text = text[s:].splitlines()[0][4:]
+log(text)
+warnings.showwarning = showthis
+def _alias(class_, new_name, legacy_name=None):
+'''
+Adds an alias for a class_ or module item clled <class_>.<new>.
+class_:
+Class/module to modify; use None for the current module.
+new_name:
+String name of existing item, e.g. name of method.
+legacy_name:
+Name of legacy object to create in <class_>. If None, we generate
+from <item> by removing underscores and capitalising the next
+letter.
+'''
+if class_ is None:
+class_ = sys.modules[__name__]
+if not legacy_name:
+legacy_name = ''
+capitalise_next = False
+for c in new_name:
+if c == '_':
+capitalise_next = True
+elif capitalise_next:
+legacy_name += c.upper()
+capitalise_next = False
+else:
+legacy_name += c
+new_object = getattr( class_, new_name)
+assert not getattr( class_, legacy_name, None), f'class {class_} already has {legacy_name}'
+if callable( new_object):
+def deprecated_function( *args, **kwargs):
+warnings.warn(
+f'"{legacy_name=}" removed from {class_} after v1.19.0 - use "{new_name}".',
+category=FitzDeprecation,
+)
+return new_object( *args, **kwargs)
+setattr( class_, legacy_name, deprecated_function)
+deprecated_function.__doc__ = (
+f'*** Deprecated and removed in version after v1.19.0 - use "{new_name}". ***\n'
+f'{new_object.__doc__}'
+)
+else:
+setattr( class_, legacy_name, new_object)
+_alias( Annot, 'get_file',              'fileGet')
+_alias( Annot, 'get_pixmap')
+_alias( Annot, 'get_sound',             'soundGet')
+_alias( Annot, 'get_text')
+_alias( Annot, 'get_textbox')
+_alias( Annot, 'get_textpage',          'getTextPage')
+_alias( Annot, 'line_ends')
+_alias( Annot, 'set_blendmode',         'setBlendMode')
+_alias( Annot, 'set_border')
+_alias( Annot, 'set_colors')
+_alias( Annot, 'set_flags')
+_alias( Annot, 'set_info')
+_alias( Annot, 'set_line_ends')
+_alias( Annot, 'set_name')
+_alias( Annot, 'set_oc', 'setOC')
+_alias( Annot, 'set_opacity')
+_alias( Annot, 'set_rect')
+_alias( Annot, 'update_file',           'fileUpd')
+_alias( DisplayList, 'get_pixmap')
+_alias( DisplayList, 'get_textpage',    'getTextPage')
+_alias( Document, 'chapter_count')
+_alias( Document, 'chapter_page_count')
+_alias( Document, 'convert_to_pdf',     'convertToPDF')
+_alias( Document, 'copy_page')
+_alias( Document, 'delete_page')
+_alias( Document, 'delete_pages',       'deletePageRange')
+_alias( Document, 'embfile_add',        'embeddedFileAdd')
+_alias( Document, 'embfile_count',      'embeddedFileCount')
+_alias( Document, 'embfile_del',        'embeddedFileDel')
+_alias( Document, 'embfile_get',        'embeddedFileGet')
+_alias( Document, 'embfile_info',       'embeddedFileInfo')
+_alias( Document, 'embfile_names',      'embeddedFileNames')
+_alias( Document, 'embfile_upd',        'embeddedFileUpd')
+_alias( Document, 'extract_font')
+_alias( Document, 'extract_image')
+_alias( Document, 'find_bookmark')
+_alias( Document, 'fullcopy_page')
+_alias( Document, 'get_char_widths')
+_alias( Document, 'get_ocgs',           'getOCGs')
+_alias( Document, 'get_page_fonts',     'getPageFontList')
+_alias( Document, 'get_page_images',    'getPageImageList')
+_alias( Document, 'get_page_pixmap')
+_alias( Document, 'get_page_text')
+_alias( Document, 'get_page_xobjects',  'getPageXObjectList')
+_alias( Document, 'get_sigflags',       'getSigFlags')
+_alias( Document, 'get_toc', 'getToC')
+_alias( Document, 'get_xml_metadata')
+_alias( Document, 'insert_page')
+_alias( Document, 'insert_pdf',         'insertPDF')
+_alias( Document, 'is_dirty')
+_alias( Document, 'is_form_pdf',        'isFormPDF')
+_alias( Document, 'is_pdf', 'isPDF')
+_alias( Document, 'is_reflowable')
+_alias( Document, 'is_repaired')
+_alias( Document, 'last_location')
+_alias( Document, 'load_page')
+_alias( Document, 'make_bookmark')
+_alias( Document, 'move_page')
+_alias( Document, 'needs_pass')
+_alias( Document, 'new_page')
+_alias( Document, 'next_location')
+_alias( Document, 'page_count')
+_alias( Document, 'page_cropbox',       'pageCropBox')
+_alias( Document, 'page_xref')
+_alias( Document, 'pdf_catalog',        'PDFCatalog')
+_alias( Document, 'pdf_trailer',        'PDFTrailer')
+_alias( Document, 'prev_location',      'previousLocation')
+_alias( Document, 'resolve_link')
+_alias( Document, 'search_page_for')
+_alias( Document, 'set_language')
+_alias( Document, 'set_metadata')
+_alias( Document, 'set_toc', 'setToC')
+_alias( Document, 'set_xml_metadata')
+_alias( Document, 'update_object')
+_alias( Document, 'update_stream')
+_alias( Document, 'xref_is_stream',     'isStream')
+_alias( Document, 'xref_length')
+_alias( Document, 'xref_object')
+_alias( Document, 'xref_stream')
+_alias( Document, 'xref_stream_raw')
+_alias( Document, 'xref_xml_metadata',  'metadataXML')
+_alias( IRect, 'get_area')
+_alias( IRect, 'get_area',              'getRectArea')
+_alias( IRect, 'include_point')
+_alias( IRect, 'include_rect')
+_alias( IRect, 'is_empty')
+_alias( IRect, 'is_infinite')
+_alias( Link, 'is_external')
+_alias( Link, 'set_border')
+_alias( Link, 'set_colors')
+_alias( Matrix, 'is_rectilinear')
+_alias( Matrix, 'prerotate',            'preRotate')
+_alias( Matrix, 'prescale',             'preScale')
+_alias( Matrix, 'preshear',             'preShear')
+_alias( Matrix, 'pretranslate',         'preTranslate')
+_alias( None, 'get_pdf_now',            'getPDFnow')
+_alias( None, 'get_pdf_str',            'getPDFstr')
+_alias( None, 'get_text_length')
+_alias( None, 'get_text_length',        'getTextlength')
+_alias( None, 'image_profile',          'ImageProperties')
+_alias( None, 'paper_rect',             'PaperRect')
+_alias( None, 'paper_size',             'PaperSize')
+_alias( None, 'paper_sizes')
+_alias( None, 'planish_line')
+_alias( Outline, 'is_external')
+_alias( Outline, 'is_open')
+_alias( Page, 'add_caret_annot')
+_alias( Page, 'add_circle_annot')
+_alias( Page, 'add_file_annot')
+_alias( Page, 'add_freetext_annot')
+_alias( Page, 'add_highlight_annot')
+_alias( Page, 'add_ink_annot')
+_alias( Page, 'add_line_annot')
+_alias( Page, 'add_polygon_annot')
+_alias( Page, 'add_polyline_annot')
+_alias( Page, 'add_rect_annot')
+_alias( Page, 'add_redact_annot')
+_alias( Page, 'add_squiggly_annot')
+_alias( Page, 'add_stamp_annot')
+_alias( Page, 'add_strikeout_annot')
+_alias( Page, 'add_text_annot')
+_alias( Page, 'add_underline_annot')
+_alias( Page, 'add_widget')
+_alias( Page, 'clean_contents')
+_alias( Page, 'cropbox',                'CropBox')
+_alias( Page, 'cropbox_position',       'CropBoxPosition')
+_alias( Page, 'delete_annot')
+_alias( Page, 'delete_link')
+_alias( Page, 'delete_widget')
+_alias( Page, 'derotation_matrix')
+_alias( Page, 'draw_bezier')
+_alias( Page, 'draw_circle')
+_alias( Page, 'draw_curve')
+_alias( Page, 'draw_line')
+_alias( Page, 'draw_oval')
+_alias( Page, 'draw_polyline')
+_alias( Page, 'draw_quad')
+_alias( Page, 'draw_rect')
+_alias( Page, 'draw_sector')
+_alias( Page, 'draw_squiggle')
+_alias( Page, 'draw_zigzag')
+_alias( Page, 'first_annot')
+_alias( Page, 'first_link')
+_alias( Page, 'first_widget')
+_alias( Page, 'get_contents')
+_alias( Page, 'get_displaylist',        'getDisplayList')
+_alias( Page, 'get_drawings')
+_alias( Page, 'get_fonts',              'getFontList')
+_alias( Page, 'get_image_bbox')
+_alias( Page, 'get_images',             'getImageList')
+_alias( Page, 'get_links')
+_alias( Page, 'get_pixmap')
+_alias( Page, 'get_svg_image',          'getSVGimage')
+_alias( Page, 'get_text')
+_alias( Page, 'get_text_blocks')
+_alias( Page, 'get_text_words')
+_alias( Page, 'get_textbox')
+_alias( Page, 'get_textpage',           'getTextPage')
+_alias( Page, 'insert_font')
+_alias( Page, 'insert_image')
+_alias( Page, 'insert_link')
+_alias( Page, 'insert_text')
+_alias( Page, 'insert_textbox')
+_alias( Page, 'is_wrapped',             '_isWrapped')
+_alias( Page, 'load_annot')
+_alias( Page, 'load_links')
+_alias( Page, 'mediabox',               'MediaBox')
+_alias( Page, 'mediabox_size',          'MediaBoxSize')
+_alias( Page, 'new_shape')
+_alias( Page, 'read_contents')
+_alias( Page, 'rotation_matrix')
+_alias( Page, 'search_for')
+_alias( Page, 'set_cropbox',            'setCropBox')
+_alias( Page, 'set_mediabox',           'setMediaBox')
+_alias( Page, 'set_rotation')
+_alias( Page, 'show_pdf_page',          'showPDFpage')
+_alias( Page, 'transformation_matrix')
+_alias( Page, 'update_link')
+_alias( Page, 'wrap_contents')
+_alias( Page, 'write_text')
+_alias( Pixmap, 'clear_with')
+_alias( Pixmap, 'copy',                 'copyPixmap')
+_alias( Pixmap, 'gamma_with')
+_alias( Pixmap, 'invert_irect',         'invertIRect')
+_alias( Pixmap, 'pil_save',             'pillowWrite')
+_alias( Pixmap, 'pil_tobytes',          'pillowData')
+_alias( Pixmap, 'save',                 'writeImage')
+_alias( Pixmap, 'save',                 'writePNG')
+_alias( Pixmap, 'set_alpha')
+_alias( Pixmap, 'set_dpi',              'setResolution')
+_alias( Pixmap, 'set_origin')
+_alias( Pixmap, 'set_pixel')
+_alias( Pixmap, 'set_rect')
+_alias( Pixmap, 'tint_with')
+_alias( Pixmap, 'tobytes',              'getImageData')
+_alias( Pixmap, 'tobytes',              'getPNGData')
+_alias( Pixmap, 'tobytes',              'getPNGdata')
+_alias( Quad, 'is_convex')
+_alias( Quad, 'is_empty')
+_alias( Quad, 'is_rectangular')
+_alias( Rect, 'get_area')
+_alias( Rect, 'get_area',               'getRectArea')
+_alias( Rect, 'include_point')
+_alias( Rect, 'include_rect')
+_alias( Rect, 'is_empty')
+_alias( Rect, 'is_infinite')
+_alias( TextWriter, 'fill_textbox')
+_alias( TextWriter, 'write_text')
+_alias( utils.Shape, 'draw_bezier')
+_alias( utils.Shape, 'draw_circle')
+_alias( utils.Shape, 'draw_curve')
+_alias( utils.Shape, 'draw_line')
+_alias( utils.Shape, 'draw_oval')
+_alias( utils.Shape, 'draw_polyline')
+_alias( utils.Shape, 'draw_quad')
+_alias( utils.Shape, 'draw_rect')
+_alias( utils.Shape, 'draw_sector')
+_alias( utils.Shape, 'draw_squiggle')
+_alias( utils.Shape, 'draw_zigzag')
+_alias( utils.Shape, 'insert_text')
+_alias( utils.Shape, 'insert_textbox')
+if 0:
+restore_aliases()
+__version__ = VersionBind
+__doc__ = (
+f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
+f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
+)

Mercurial > hgrepos > Python2 > PyMuPDF

comparison src/__init__.py @ 1:1d09e1dec1d9 upstream

comparison src/init.py @ 1:1d09e1dec1d9 upstream