diff mupdf-source/scripts/wrap/state.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children b5f06508363a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/scripts/wrap/state.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,379 @@
+'''
+Misc state.
+'''
+
+import glob
+import os
+import platform
+import re
+import sys
+
+import jlib
+
+from . import parse
+
+try:
+    import clang.cindex
+except Exception as e:
+    jlib.log('Warning: failed to import clang.cindex: {e=}\n'
+            f'We need Clang Python to build MuPDF python.\n'
+            f'Install with `pip install libclang` (typically inside a Python venv),\n'
+            f'or (OpenBSD only) `pkg_add py3-llvm.`\n'
+            )
+    clang = None
+
+omit_fns = [
+        'fz_open_file_w',
+        'fz_colorspace_name_process_colorants', # Not implemented in mupdf.so?
+        'fz_clone_context_internal',            # Not implemented in mupdf?
+        'fz_assert_lock_held',      # Is a macro if NDEBUG defined.
+        'fz_assert_lock_not_held',  # Is a macro if NDEBUG defined.
+        'fz_lock_debug_lock',       # Is a macro if NDEBUG defined.
+        'fz_lock_debug_unlock',     # Is a macro if NDEBUG defined.
+        'fz_argv_from_wargv',       # Only defined on Windows. Breaks our out-param wrapper code.
+
+        # Only defined on Windows, so breaks building Windows wheels from
+        # sdist, because the C++ source in sdist (usually generated on Unix)
+        # does not contain these functions, but SWIG-generated code will try to
+        # call them.
+        'fz_utf8_from_wchar',
+        'fz_wchar_from_utf8',
+        'fz_fopen_utf8',
+        'fz_remove_utf8',
+        'fz_argv_from_wargv',
+        'fz_free_argv',
+        'fz_stdods',
+        ]
+
+omit_methods = []
+
+
+def get_name_canonical( type_):
+    '''
+    Wrap Clang's clang.cindex.Type.get_canonical() to avoid returning anonymous
+    struct that clang spells as 'struct (unnamed at ...)'.
+    '''
+    if type_.spelling in ('size_t', 'int64_t'):
+        #jlib.log( 'Not canonicalising {self.spelling=}')
+        return type_
+    ret = type_.get_canonical()
+    if 'struct (unnamed' in ret.spelling:
+        jlib.log( 'Not canonicalising {type_.spelling=}')
+        ret = type_
+    return ret
+
+
+class State:
+    def __init__( self):
+        self.os_name = platform.system()
+        self.windows = (self.os_name == 'Windows' or self.os_name.startswith('CYGWIN'))
+        self.cygwin = self.os_name.startswith('CYGWIN')
+        self.openbsd = self.os_name == 'OpenBSD'
+        self.linux = self.os_name == 'Linux'
+        self.macos = self.os_name == 'Darwin'
+        self.pyodide = os.environ.get('OS') == 'pyodide'
+        self.have_done_build_0 = False
+
+        # Maps from <tu> to dict of fnname: cursor.
+        self.functions_cache = dict()
+
+        # Maps from <tu> to dict of dataname: cursor.
+        self.global_data = dict()
+
+        self.enums = dict()
+        self.structs = dict()
+
+        # Code should show extra information if state_.show_details(name)
+        # returns true.
+        #
+        self.show_details = lambda name: False
+
+    def functions_cache_populate( self, tu):
+        if tu in self.functions_cache:
+            return
+        fns = dict()
+        global_data = dict()
+        enums = dict()
+        structs = dict()
+
+        for cursor in parse.get_children(tu.cursor):
+            verbose = state_.show_details( cursor.spelling)
+            if verbose:
+                jlib.log('Looking at {cursor.spelling=} {cursor.kind=} {cursor.location=}')
+            if cursor.kind==clang.cindex.CursorKind.ENUM_DECL:
+                #jlib.log('ENUM_DECL: {cursor.spelling=}')
+                enum_values = list()
+                for cursor2 in cursor.get_children():
+                    #jlib.log('    {cursor2.spelling=}')
+                    name = cursor2.spelling
+                    enum_values.append(name)
+                enums[ get_name_canonical( cursor.type).spelling] = enum_values
+            if cursor.kind==clang.cindex.CursorKind.TYPEDEF_DECL:
+                name = cursor.spelling
+                if name.startswith( ( 'fz_', 'pdf_')):
+                    structs[ name] = cursor
+            if cursor.kind == clang.cindex.CursorKind.FUNCTION_DECL:
+                fnname = cursor.spelling
+                if self.show_details( fnname):
+                    jlib.log( 'Looking at {fnname=}')
+                if fnname in omit_fns:
+                    jlib.log1('{fnname=} is in omit_fns')
+                else:
+                    fns[ fnname] = cursor
+            if (cursor.kind == clang.cindex.CursorKind.VAR_DECL
+                    and cursor.linkage == clang.cindex.LinkageKind.EXTERNAL
+                    ):
+                global_data[ cursor.spelling] = cursor
+
+        self.functions_cache[ tu] = fns
+        self.global_data[ tu] = global_data
+        self.enums[ tu] = enums
+        self.structs[ tu] = structs
+        jlib.log1('Have populated fns and global_data. {len(enums)=} {len(self.structs)} {len(fns)=}')
+
+    def find_functions_starting_with( self, tu, name_prefix, method):
+        '''
+        Yields (name, cursor) for all functions in <tu> whose names start with
+        <name_prefix>.
+
+        method:
+            If true, we omit names that are in omit_methods
+        '''
+        self.functions_cache_populate( tu)
+        fn_to_cursor = self.functions_cache[ tu]
+        for fnname, cursor in fn_to_cursor.items():
+            verbose = state_.show_details( fnname)
+            if method and fnname in omit_methods:
+                if verbose:
+                    jlib.log('{fnname=} is in {omit_methods=}')
+                continue
+            if not fnname.startswith( name_prefix):
+                if 0 and verbose:
+                    jlib.log('{fnname=} does not start with {name_prefix=}')
+                continue
+            if verbose:
+                jlib.log('{name_prefix=} yielding {fnname=}')
+            yield fnname, cursor
+
+    def find_global_data_starting_with( self, tu, prefix):
+        for name, cursor in self.global_data[tu].items():
+            if name.startswith( prefix):
+                yield name, cursor
+
+    def find_function( self, tu, fnname, method):
+        '''
+        Returns cursor for function called <fnname> in <tu>, or None if not found.
+        '''
+        assert ' ' not in fnname, f'fnname={fnname}'
+        if method and fnname in omit_methods:
+            assert 0, f'method={method} fnname={fnname} omit_methods={omit_methods}'
+        self.functions_cache_populate( tu)
+        return self.functions_cache[ tu].get( fnname)
+
+
+
+state_ = State()
+
+
+def abspath(path):
+    '''
+    Like os.path.absath() but converts backslashes to forward slashes; this
+    simplifies things on Windows - allows us to use '/' as directory separator
+    when constructing paths, which is simpler than using os.sep everywhere.
+    '''
+    ret = os.path.abspath(path)
+    ret = ret.replace('\\', '/')
+    return ret
+
+
+class Cpu:
+    '''
+    For Windows only. Paths and names that depend on cpu.
+
+    Members:
+        .bits
+            .
+        .windows_subdir
+            '' or 'x64/', e.g. platform/win32/x64/Release.
+        .windows_name
+            'x86' or 'x64'.
+        .windows_config
+            'x64' or 'Win32', e.g. /Build Release|x64
+        .windows_suffix
+            '64' or '', e.g. mupdfcpp64.dll
+    '''
+    def __init__(self, name=None):
+        if name is None:
+            name = cpu_name()
+        self.name = name
+        if name == 'x32':
+            self.bits = 32
+            self.windows_subdir = ''
+            self.windows_name = 'x86'
+            self.windows_config = 'Win32'
+            self.windows_suffix = ''
+        elif name == 'x64':
+            self.bits = 64
+            self.windows_subdir = 'x64/'
+            self.windows_name = 'x64'
+            self.windows_config = 'x64'
+            self.windows_suffix = '64'
+        else:
+            assert 0, f'Unrecognised cpu name: {name}'
+
+    def __str__(self):
+        return self.name
+    def __repr__(self):
+        return f'Cpu:{self.name}'
+
+def python_version():
+    '''
+    Returns two-digit version number of Python as a string, e.g. '3.9'.
+    '''
+    ret = '.'.join(platform.python_version().split('.')[:2])
+    #jlib.log(f'returning ret={ret!r}')
+    return ret
+
+def cpu_name():
+    '''
+    Returns 'x32' or 'x64' depending on Python build.
+    '''
+    ret = f'x{32 if sys.maxsize == 2**31 - 1 else 64}'
+    #jlib.log(f'returning ret={ret!r}')
+    return ret
+
+def cmd_run_multiple(commands, prefix=None):
+    '''
+    Windows-only.
+
+    Runs multiple commands joined by &&, using cmd.exe if we are running under
+    Cygwin. We cope with commands that already contain double-quote characters.
+    '''
+    if state_.cygwin:
+        command = 'cmd.exe /V /C @ ' + ' "&&" '.join(commands)
+    else:
+        command = ' && '.join(commands)
+    jlib.system(command, verbose=1, out='log', prefix=prefix)
+
+
+class BuildDirs:
+    '''
+    Locations of various generated files.
+    '''
+    def __init__( self):
+
+        # Assume we are in mupdf/scripts/.
+        #jlib.log( f'platform.platform(): {platform.platform()}')
+        file_ = abspath( __file__)
+        assert file_.endswith( f'/scripts/wrap/state.py'), \
+                'Unexpected __file__=%s file_=%s' % (__file__, file_)
+        dir_mupdf = abspath( f'{file_}/../../../')
+        assert not dir_mupdf.endswith( '/')
+
+        # Directories used with --build.
+        self.dir_mupdf = dir_mupdf
+
+        # Directory used with --ref.
+        self.ref_dir = abspath( f'{self.dir_mupdf}/mupdfwrap_ref')
+        assert not self.ref_dir.endswith( '/')
+
+        self.set_dir_so( f'{self.dir_mupdf}/build/shared-release')
+
+    def set_dir_so( self, dir_so):
+        '''
+        Sets self.dir_so and also updates self.cpp_flags etc. Special case
+        `dir_so='-'` sets to None.
+        '''
+        if dir_so == '-':
+            self.dir_so = None
+            self.cpp_flags = None
+            return
+
+        dir_so = abspath( dir_so)
+        self.dir_so = dir_so
+
+        if state_.windows:
+            # debug builds have:
+            # /Od
+            # /D _DEBUG
+            # /RTC1
+            # /MDd
+            #
+            if 0: pass  # lgtm [py/unreachable-statement]
+            elif '-release' in dir_so:
+                self.cpp_flags = '/O2 /DNDEBUG'
+            elif '-debug' in dir_so:
+                # `/MDd` forces use of debug runtime and (i think via
+                # it setting `/D _DEBUG`) debug versions of things like
+                # `std::string` (incompatible with release builds). We also set
+                # `/Od` (no optimisation) and `/RTC1` (extra runtime checks)
+                # because these seem to be conventionally set in VS.
+                #
+                self.cpp_flags = '/MDd /Od /RTC1'
+            elif '-memento' in dir_so:
+                self.cpp_flags = '/MDd /Od /RTC1 /DMEMENTO'
+            else:
+                self.cpp_flags = None
+                jlib.log( 'Warning: unrecognised {dir_so=}, so cannot determine cpp_flags')
+        else:
+            if 0: pass  # lgtm [py/unreachable-statement]
+            elif '-debug' in dir_so:    self.cpp_flags = '-g'
+            elif '-release' in dir_so:  self.cpp_flags = '-O2 -DNDEBUG'
+            elif '-memento' in dir_so:  self.cpp_flags = '-g -DMEMENTO'
+            else:
+                self.cpp_flags = None
+                jlib.log( 'Warning: unrecognised {dir_so=}, so cannot determine cpp_flags')
+
+        # Set self.cpu and self.python_version.
+        if state_.windows:
+            # Infer cpu and python version from self.dir_so. And append current
+            # cpu and python version if not already present.
+            m = re.search( '-(x[0-9]+)-py([0-9.]+)$', self.dir_so)
+            if not m:
+                suffix = f'-{Cpu(cpu_name())}-py{python_version()}'
+                jlib.log('Adding suffix to {self.dir_so=}: {suffix!r}')
+                self.dir_so += suffix
+                m = re.search( '-(x[0-9]+)-py([0-9.]+)$', self.dir_so)
+                assert m
+            #log(f'self.dir_so={self.dir_so} {os.path.basename(self.dir_so)} m={m}')
+            assert m, f'Failed to parse dir_so={self.dir_so!r} - should be *-x32|x64-pyA.B'
+            self.cpu = Cpu( m.group(1))
+            self.python_version = m.group(2)
+            #jlib.log('{self.cpu=} {self.python_version=} {dir_so=}')
+        else:
+            # Use Python we are running under.
+            self.cpu = Cpu(cpu_name())
+            self.python_version = python_version()
+
+        # Set Py_LIMITED_API if it occurs in dir_so.
+        self.Py_LIMITED_API = None
+        flags = os.path.basename(self.dir_so).split('-')
+        for flag in flags:
+            if flag in ('Py_LIMITED_API', 'PLA'):
+                self.Py_LIMITED_API = '0x03080000'
+            elif flag.startswith('Py_LIMITED_API='):    # 2024-11-15: fixme: obsolete
+                self.Py_LIMITED_API = flag[len('Py_LIMITED_API='):]
+            elif flag.startswith('Py_LIMITED_API_'):
+                self.Py_LIMITED_API = flag[len('Py_LIMITED_API_'):]
+            elif flag.startswith('PLA_'):
+                self.Py_LIMITED_API = flag[len('PLA_'):]
+        jlib.log(f'{self.Py_LIMITED_API=}')
+
+        # Set swig .i and .cpp paths, including Py_LIMITED_API so that
+        # different values of Py_LIMITED_API can be tested without rebuilding
+        # unnecessarily.
+        Py_LIMITED_API_infix = f'-Py_LIMITED_API_{self.Py_LIMITED_API}' if self.Py_LIMITED_API else ''
+        self.mupdfcpp_swig_i    = lambda language: f'{self.dir_mupdf}/platform/{language}/mupdfcpp_swig{Py_LIMITED_API_infix}.i'
+        self.mupdfcpp_swig_cpp  = lambda language: self.mupdfcpp_swig_i(language) + '.cpp'
+
+    def windows_build_type(self):
+        '''
+        Returns `Release` or `Debug`.
+        '''
+        dir_so_flags = os.path.basename( self.dir_so).split( '-')
+        if 'debug' in dir_so_flags:
+            return 'Debug'
+        elif 'release' in dir_so_flags:
+            return 'Release'
+        else:
+            assert 0, f'Expecting "-release-" or "-debug-" in build_dirs.dir_so={self.dir_so}'