diff mupdf-source/setup.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/setup.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,561 @@
+#!/usr/bin/env python3
+
+'''
+Installation script for MuPDF Python bindings, using scripts/pipcl.py.
+
+Notes:
+
+    When building an sdist (e.g. with 'pip sdist'), we use clang-python to
+    generate C++ source which is then included in the sdist.
+
+    This allows wheels to be built from an sdist without requiring clang-python
+    to be installed.
+
+
+Internal testing only - environmental variables:
+
+    MUPDF_SETUP_BUILD_DIR
+        Overrides the default build directory.
+
+    MUPDF_SETUP_USE_CLANG_PYTHON
+        Affects whether we use clang-python when building.
+
+        If set, must be '0' or '1', and we override the default and do not
+        ('0') / do ('1') use clang-python to generate C++ source code from
+        MuPDF headers.
+
+        If we are an sdist we default to not re-generating C++ - the generated
+        files will be already available in platform/c++/. Otherwise we default
+        to generating C++ source code.
+
+    MUPDF_SETUP_USE_SWIG
+        If set, must be '0' or '1', and we do not ('0') / do ('1') attempt to
+        run swig.
+'''
+
+import os
+import platform
+import re
+import subprocess
+import sys
+import time
+
+
+def log(text=''):
+    for line in text.split('\n'):
+        print(f'mupdf:setup.py: {line}')
+    sys.stdout.flush()
+
+def cache(function):
+    '''
+    Simple (and probably unnecessary) caching decorator.
+    '''
+    cache = {}
+    def wrapper(*args):
+        if not args in cache:
+            cache[args] = function()
+        return cache[args]
+    return wrapper
+
+@cache
+def root_dir():
+    return os.path.dirname(os.path.abspath(__file__))
+
+@cache
+def windows():
+    s = platform.system()
+    return s == 'Windows' or s.startswith('CYGWIN')
+
+@cache
+def macos():
+    s = platform.system()
+    return s == 'Darwin'
+
+@cache
+def openbsd():
+    s = platform.system()
+    return s == 'OpenBSD'
+
+@cache
+def msys2():
+    return platform.system().startswith('MSYS_NT-')
+
+@cache
+def build_dir():
+    # This is x86/x64-specific.
+    #
+    # We generate 32 or 64-bit binaries to match whatever Python we
+    # are running under.
+    #
+    ret = os.environ.get('MUPDF_SETUP_BUILD_DIR')
+    if ret is None:
+        cpu = 'x32' if sys.maxsize == 2**31 - 1 else 'x64'
+        python_version = '.'.join(platform.python_version().split('.')[:2])
+        ret = f'{root_dir()}/build/shared-release-{cpu}-py{python_version}'
+    return ret
+
+@cache
+def in_sdist():
+    return os.path.exists(f'{root_dir()}/PKG-INFO')
+
+sys.path.append(f'{root_dir()}/scripts')
+import pipcl
+
+
+@cache
+def mupdf_version():
+    '''
+    Returns version string.
+
+    If $MUPDF_SETUP_VERSION is set we use it directly, asserting that it starts
+    with the version string defined in include/mupdf/fitz/version.h.
+
+    Otherwise if we are in an sdist ('PKG-INFO' exists) we use its
+    version. We assert that this starts with the base version in
+    include/mupdf/fitz/version.h.
+
+    Otherwise we generate a version string by appending the current date and
+    time to the base version in include/mupdf/fitz/version.h. For example
+    '1.18.0.20210330.1800'.
+    '''
+    return mupdf_version_internal()
+
+
+def mupdf_version_internal(t_tuple=None):
+    '''
+    Return version number, with doctest check for broken behaviour with leading
+    zeros.
+
+    >>> t0str = '2024-06-06-00:00'
+    >>> t0tuple = time.strptime(t0str, '%Y-%m-%d-%H:%M')
+    >>> v = mupdf_version_internal(t0tuple)
+    >>> print(v, file=sys.stderr)
+    >>> assert v.endswith('.202406060000')
+    '''
+    with open(f'{root_dir()}/include/mupdf/fitz/version.h') as f:
+        text = f.read()
+    m = re.search('\n#define FZ_VERSION "([^"]+)"\n', text)
+    assert m
+    base_version = m.group(1)
+
+    # If MUPDF_SETUP_VERSION exists, use it.
+    #
+    ret = os.environ.get('MUPDF_SETUP_VERSION')
+    if ret:
+        log(f'Using version from $MUPDF_SETUP_VERSION: {ret}')
+        assert ret.startswith(base_version)
+        return ret
+
+    # If we are in an sdist, so use the version from the PKG-INFO file.
+    #
+    if in_sdist():
+        items = pipcl.parse_pkg_info('PKG-INFO')
+        assert items['Name'] == 'mupdf'
+        ret = items['Version']
+        #log(f'Using version from PKG-INFO: {ret}')
+        assert ret.startswith(base_version)
+        return ret
+
+    # If we get here, we are in a source tree.
+    #
+    # We use the MuPDF version with a unique(ish) suffix based on the current
+    # date and time, so we can make multiple Python releases without requiring
+    # an increment to the MuPDF version.
+    #
+    # This also allows us to easily experiment on test.pypi.org.
+    #
+    # We have to avoid the time component(s) containing `.0` as this is
+    # prohibited by PEP-440.
+    #
+    if t_tuple is None:
+        t_tuple = time.localtime()
+    tt = time.strftime(".%Y%m%d%H%M", t_tuple)
+    tail = tt.replace('.0', '.')
+    ret = base_version + tail
+    #log(f'Have created version number: {ret}')
+    pipcl._assert_version_pep_440(ret)
+    return ret
+
+
+def git_info():
+    '''
+    Returns (current, origin, diff):
+        current: git id from 'git show'.
+        origin: git id from 'git show origin'.
+        diff: diff relative to current.
+    '''
+    def get_id(command):
+        text = subprocess.check_output(command, shell=True, cwd=root_dir())
+        text = text.decode('utf8')
+        text = text.split('\n', 1)[0]
+        text = text.split(' ', 1)[0]
+        return text
+    current = get_id('git show --pretty=oneline')
+    origin = get_id('git show --pretty=oneline origin')
+    diff = subprocess.check_output(f'cd {root_dir()} && git diff', shell=True).decode('utf8')
+    return current, origin, diff
+
+
+def get_flag(name, default):
+    '''
+    name:
+        Name of environmental variable.
+    default:
+        Value to return if <name> undefined.
+    Returns False if name is '0', True if name is '1', <default> if
+    undefined. Otherwise assert fails.
+    '''
+    value = os.environ.get(name)
+    if value is None:
+        ret = default
+    elif value == '0':
+        ret = False
+    elif value == '1':
+        ret = True
+    else:
+        assert 0, f'If set, ${name} must be "0" or "1", but is: {value!r}'
+    log(f'name={name} default={default} value={value} ret={ret}')
+    return ret
+
+
+# pipcl Callbacks.
+#
+
+def sdist():
+    '''
+    pipcl callback. We run './scripts/mupdfwrap.py -b 0' to create C++ files
+    etc using clang-python, and return these generated files plus all files
+    known to git. [This allows sdists to be used to generate wheels etc on
+    machines without clang-python.]
+    '''
+    assert os.path.exists(f'{root_dir()}/.git'), f'Cannot make sdist because not a git checkout: {root_dir()}'
+
+    # Create 'git-info' file containing git ids that identify this tree. For
+    # the moment this is a simple text format, but we could possibly use pickle
+    # instead, depending on whether we want to include more information, e.g.
+    # diff relative to origin.
+    #
+    git_id, git_id_origin, git_diff = git_info()
+    with open(f'{root_dir()}/git-info', 'w') as f:
+        f.write(f'git-id: {git_id}\n')
+        f.write(f'git-id-origin: {git_id_origin}\n')
+        f.write(f'git-diff:\n{git_diff}\n')
+
+    paths = pipcl.git_items( root_dir(), submodules=True)
+
+    # Strip out some large test directories.
+    i = 0
+    while i < len( paths):
+        path = paths[i]
+        remove = False
+        if (0
+                or path.startswith( 'thirdparty/harfbuzz/test/')
+                or path.startswith( 'thirdparty/tesseract/test/')
+                or path.startswith( 'thirdparty/extract/test/')
+                ):
+            remove = True
+        if remove:
+            #log( f'Excluding: {path}')
+            del paths[i]
+        else:
+            i += 1
+
+    # Build C++ files and SWIG C code for inclusion in sdist, so that it can be
+    # used on systems without clang-python or SWIG.
+    #
+    use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', True)
+    use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
+    b = ''
+    if use_clang_python:
+        b += '0'
+    if use_swig:
+        b += '2'
+    command = '' if os.getcwd() == root_dir() else f'cd {os.path.relpath(root_dir())} && '
+    command += f'{sys.executable} ./scripts/mupdfwrap.py -d {build_dir()} -b "{b}"'
+    log(f'Running: {command}')
+    subprocess.check_call(command, shell=True)
+    paths += [
+            'build/shared-release/mupdf.py',
+            'git-info',
+            'platform/c++/generated.pickle',
+            'platform/c++/implementation/classes.cpp',
+            'platform/c++/implementation/classes2.cpp',
+            'platform/c++/implementation/exceptions.cpp',
+            'platform/c++/implementation/functions.cpp',
+            'platform/c++/implementation/internal.cpp',
+            'platform/c++/include/mupdf/classes.h',
+            'platform/c++/include/mupdf/classes2.h',
+            'platform/c++/include/mupdf/exceptions.h',
+            'platform/c++/include/mupdf/functions.h',
+            'platform/c++/include/mupdf/internal.h',
+            'platform/c++/windows_mupdf.def',
+            'platform/python/mupdfcpp_swig.i.cpp',
+            ]
+    return paths
+
+
+def build():
+    '''
+    pipcl callback. Build MuPDF C, C++ and Python libraries and return list of
+    created files.
+    '''
+    # If we are an sdist, default to not trying to run clang-python - the
+    # generated files will already exist, and installing/using clang-python
+    # might be tricky.
+    #
+    use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', not in_sdist())
+    use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
+
+    b = ''
+    if not windows():
+        b = 'm'     # Build C library.
+    if use_clang_python:
+        b += '0'    # Build C++ source.
+    b += '1'        # Build C++ library (also contains C library on Windows).
+    if use_swig:
+        b += '2'    # Build SWIG-generated source.
+    b += '3'        # Build SWIG library _mupdf.so.
+
+    command = '' if root_dir() == os.getcwd() else f'cd {os.path.relpath(root_dir())} && '
+    command += (
+            f'"{sys.executable}" ./scripts/mupdfwrap.py'
+            f' -d {build_dir()}'
+            f' -b {b}'
+            )
+
+    do_build = os.environ.get('MUPDF_SETUP_DO_BUILD')
+    if do_build == '0':
+        # This is a hack for testing.
+        log(f'Not doing build because $MUPDF_SETUP_DO_BUILD={do_build}')
+    else:
+        log(f'build(): Building MuPDF C, C++ and Python libraries with: {command}')
+        subprocess.check_call(command, shell=True)
+
+    # Return generated files to install or copy into wheel.
+    #
+    if windows():
+        infix = '' if sys.maxsize == 2**31 - 1 else '64'
+        names = [
+                f'{build_dir()}/mupdfcpp{infix}.dll',   # C and C++.
+                f'{build_dir()}/_mupdf.pyd',            # Python internals.
+                f'{build_dir()}/mupdf.py',              # Python.
+                ]
+    elif macos():
+        log( f'Contents of {build_dir()} are:')
+        for leaf in os.listdir(build_dir()):
+            log( f'    {leaf}')
+        names = [
+                f'{build_dir()}/libmupdf.dylib',    # C.
+                f'{build_dir()}/libmupdfcpp.so',    # C++.
+                f'{build_dir()}/_mupdf.so',         # Python internals.
+                f'{build_dir()}/mupdf.py',          # Python.
+                ]
+    else:
+        names = [
+                pipcl.get_soname(f'{build_dir()}/libmupdf.so'),     # C.
+                pipcl.get_soname(f'{build_dir()}/libmupdfcpp.so'),  # C++.
+                f'{build_dir()}/_mupdf.so',                         # Python internals.
+                f'{build_dir()}/mupdf.py',                          # Python.
+                ]
+    paths = []
+    for name in names:
+        paths.append((name, ''))
+
+    log(f'build(): returning: {paths}')
+    return paths
+
+
+def clean(all_):
+    if all_:
+        return [
+                'build',
+                'platform/win32/Release',
+                'platform/win32/ReleaseDLL',
+                'platform/win32/Win32',
+                'platform/win32/x64',
+                ]
+    else:
+        # Ideally we would return selected directories in platform/win32/ if on
+        # Windows, but that would get a little involved.
+        #
+        return build_dir()
+
+
+# Setup pipcl.
+#
+description = """
+Summary
+-------
+
+* Python bindings for the MuPDF PDF library.
+* A python module called ``mupdf``.
+* Generated from the MuPDF C++ API, which is itself generated from the MuPDF C API.
+* Provides Python functions that wrap most ``fz_`` and ``pdf_`` functions.
+* Provides Python classes that wrap most ``fz_`` and ``pdf_`` structs.
+
+  * Class methods provide access to most of the underlying C API functions (except for functions that don't take struct args such as ``fz_strlcpy()``).
+* MuPDF's ``setjmp``/``longjmp`` exceptions are converted to Python exceptions.
+* Functions and methods do not take ``fz_context`` arguments. (Automatically-generated per-thread contexts are used internally.)
+* Wrapper classes automatically handle reference counting of the underlying structs (with internal calls to ``fz_keep_*()`` and ``fz_drop_*()``).
+* Support for MuPDF function pointers with SWIG Director classes, allowing MuPDF to call Python callbacks.
+* Provides a small number of extensions beyond the basic C API:
+
+  * Some generated classes have extra support for iteration.
+  * Some custom class methods and constructors.
+  * Simple 'POD' structs have ``__str__()`` methods, for example ``mupdf.Rect`` is represented like: ``(x0=90.51 y0=160.65 x1=501.39 y1=215.6)``.
+
+Example usage
+-------------
+
+Minimal Python code that uses the ``mupdf`` module:
+
+::
+
+    import mupdf
+    document = mupdf.Document('foo.pdf')
+
+
+A simple example Python test script (run by ``scripts/mupdfwrap.py -t``) is:
+
+* ``scripts/mupdfwrap_test.py``
+
+More detailed usage of the Python API can be found in:
+
+* ``scripts/mutool.py``
+* ``scripts/mutool_draw.py``
+
+Here is some example code that shows all available information about document's Stext blocks, lines and characters:
+
+::
+
+    #!/usr/bin/env python3
+
+    import mupdf
+
+    def show_stext(document):
+        '''
+        Shows all available information about Stext blocks, lines and characters.
+        '''
+        for p in range(document.count_pages()):
+            page = document.load_page(p)
+            stextpage = mupdf.StextPage(page, mupdf.StextOptions())
+            for block in stextpage:
+                block_ = block.m_internal
+                log(f'block: type={block_.type} bbox={block_.bbox}')
+                for line in block:
+                    line_ = line.m_internal
+                    log(f'    line: wmode={line_.wmode}'
+                            + f' dir={line_.dir}'
+                            + f' bbox={line_.bbox}'
+                            )
+                    for char in line:
+                        char_ = char.m_internal
+                        log(f'        char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
+                                + f' origin={char_.origin}'
+                                + f' quad={char_.quad}'
+                                + f' size={char_.size:6.2f}'
+                                + f' font=('
+                                    +  f'is_mono={char_.font.flags.is_mono}'
+                                    + f' is_bold={char_.font.flags.is_bold}'
+                                    + f' is_italic={char_.font.flags.is_italic}'
+                                    + f' ft_substitute={char_.font.flags.ft_substitute}'
+                                    + f' ft_stretch={char_.font.flags.ft_stretch}'
+                                    + f' fake_bold={char_.font.flags.fake_bold}'
+                                    + f' fake_italic={char_.font.flags.fake_italic}'
+                                    + f' has_opentype={char_.font.flags.has_opentype}'
+                                    + f' invalid_bbox={char_.font.flags.invalid_bbox}'
+                                    + f' name={char_.font.name}'
+                                    + f')'
+                                )
+
+    document = mupdf.Document('foo.pdf')
+    show_stext(document)
+
+More information
+----------------
+
+https://mupdf.com/r/C-and-Python-APIs
+
+"""
+
+with open(f'{root_dir()}/COPYING') as f:
+    license = f.read()
+
+mupdf_package = pipcl.Package(
+        name = 'mupdf',
+        version = mupdf_version(),
+        root = root_dir(),
+        summary = 'Python bindings for MuPDF library.',
+        description = description,
+        classifier = [
+                'Development Status :: 4 - Beta',
+                'Intended Audience :: Developers',
+                'License :: OSI Approved :: GNU Affero General Public License v3',
+                'Programming Language :: Python :: 3',
+                ],
+        author = 'Artifex Software, Inc.',
+        author_email = 'support@artifex.com',
+        home_page = 'https://mupdf.com/',
+        project_url = [
+            ('Documentation, https://mupdf.com/r/C-and-Python-APIs/'),
+            ('Source, https://git.ghostscript.com/?p=mupdf.git'),
+            ('Tracker, https://bugs.ghostscript.com/'),
+            ],
+        keywords = 'PDF',
+        platform = None,
+        license = license,
+        fn_build = build,
+        fn_clean = clean,
+        fn_sdist = sdist,
+        )
+
+
+# Things to allow us to function as a PIP-517 backend:
+#
+def build_wheel( wheel_directory, config_settings=None, metadata_directory=None):
+    return mupdf_package.build_wheel(
+            wheel_directory,
+            config_settings,
+            metadata_directory,
+            )
+
+def build_sdist( sdist_directory, config_settings=None):
+    return mupdf_package.build_sdist(
+            sdist_directory,
+            config_settings,
+            )
+
+def get_requires_for_build_wheel(config_settings=None):
+    '''
+    Adds to pyproject.toml:[build-system]:requires, allowing programmatic
+    control over what packages we require.
+    '''
+    ret = list()
+    ret.append('setuptools')
+    if openbsd():
+        #print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.')
+        pass
+    elif macos() and platform.machine() == 'arm64':
+        #print(
+        #       f'MacOS/arm64: forcing use of libclang 16.0.6 because 17.0.6'
+        #       f' and 18.1.1 are known to fail with:'
+        #       f' `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`'
+        #       )
+        ret.append('libclang==16.0.6')
+    else:
+        ret.append('libclang')
+    if msys2():
+        #print(f'msys2: pip install of swig does not build; assuming `pacman -S swig`.')
+        pass
+    elif openbsd():
+        #print(f'OpenBSD: pip install of swig does not build; assuming `pkg_add swig`.')
+        pass
+    else:
+        ret.append( 'swig')
+    return ret
+
+
+# Allow us to be used as a pre-PIP-517 setup.py script.
+#
+if __name__ == '__main__':
+    mupdf_package.handle_argv(sys.argv)