Mercurial > hgrepos > Python2 > PyMuPDF
diff scripts/sysinstall.py @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | 1d09e1dec1d9 |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/sysinstall.py Mon Sep 15 11:44:09 2025 +0200 @@ -0,0 +1,430 @@ +#! /usr/bin/env python3 + +''' +Test for Linux system install of MuPDF and PyMuPDF. + +We build and install MuPDF and PyMuPDF into a root directory, then use +scripts/test.py to run PyMuPDF's pytest tests with LD_PRELOAD_PATH and +PYTHONPATH set. + +PyMuPDF itself is installed using `python -m install` with a wheel created with +`pip wheel`. + +We run install commands with `sudo` if `--root /` is used. + +Note that we run some commands with sudo; it's important that these use the +same python as non-sudo, otherwise things can be build and installed for +different python versions. For example when we are run from a github action, it +should not do `- uses: actions/setup-python@v5` but instead use whatever system +python is already defined. + +Args: + + --gdb 0|1 + --mupdf-dir <mupdf_dir> + Path of MuPDF checkout; default is 'mupdf'. + --mupdf-do 0|1 + Whether to build and install mupdf. + --mupdf-git <git_args> + Get or update `mupdf_dir` using git. If `mupdf_dir` already + exists we run `git pull` in it; otherwise we run `git + clone` with `<git_args> <mupdf_dir>`. For example: + --mupdf-git "--branch master https://github.com/ArtifexSoftware/mupdf.git" + --mupdf-so-mode <mode> + Used with `install -m <mode> ...` when installing MuPDF. For example + `--mupdf-so-mode 744`. + --packages 0|1 + If 1 (the default) we install required system packages such as + `libfreetype-dev`. + --pip 0|venv|sudo + Whether/how to install Python packages. + If '0' we assume required packages are already available. + If 'sudo' we install required Python packages using `sudo pip install + ...`. + If 'venv' (the default) we install Python packages and run installer + and test commands inside venv's. + --prefix: + Directory within `root`; default is `/usr/local`. Must start with `/`. + --pymupdf-dir <pymupdf_dir> + Path of PyMuPDF checkout; default is 'PyMuPDF'. + --pymupdf-do 0|1 + Whether to build and install pymupdf. + --root <root> + Root of install directory; default is 'pymupdf-sysinstall-test-root'. + --tesseract5 0|1 + If 1 (the default), we force installation of libtesseract-dev version + 5 (which is not available as a default package in Ubuntu-22.04) from + package repository ppa:alex-p/tesseract-ocr-devel. + --test-venv <test_venv> + Set the name of the venv in which we run tests (only with `--pip + venv`); the default is a hard-coded venv name. The venv will be + created, and required packages installed using `pip`. + --use-installer 0|1 + If 1 (the default), we use `python -m installer` to install PyMuPDF + from a generated wheel. [Otherwise we use `pip install`, which refuses + to do a system install with `--root /`, referencing PEP-668.] + -i <implementations> + Passed through to scripts/test.py. Default is 'rR'. + -f <test-fitz> + Passed through to scripts/test.py. Default is '1'. + -p <pytest-options> + Passed through to scripts/test.py. + -t <names> + Passed through to scripts/test.py. + +To only show what commands would be run, but not actually run them, specify `-m +0 -p 0 -t 0`. +''' + +import glob +import multiprocessing +import os +import platform +import shlex +import subprocess +import sys +import sysconfig + +import test as test_py + +pymupdf_dir = os.path.abspath( f'{__file__}/../..') + +sys.path.insert(0, pymupdf_dir) +import pipcl +del sys.path[0] + +log = pipcl.log0 + +# Requirements for a system build and install: +# +# system packages (Debian names): +# +g_sys_packages = [ + 'libfreetype-dev', + 'libgumbo-dev', + 'libharfbuzz-dev', + 'libjbig2dec-dev', + 'libjpeg-dev', + 'libleptonica-dev', + 'libopenjp2-7-dev', + ] +# We also need libtesseract-dev version 5. +# + + +def main(): + + if 1: + log(f'## {__file__}: Starting.') + log(f'{sys.executable=}') + log(f'{platform.python_version()=}') + log(f'{__file__=}') + log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE")=}') + log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST")=}') + log(f'{sys.argv=}') + log(f'{sysconfig.get_path("platlib")=}') + run_command(f'python -V', check=0) + run_command(f'python3 -V', check=0) + run_command(f'sudo python -V', check=0) + run_command(f'sudo python3 -V', check=0) + run_command(f'sudo PATH={os.environ["PATH"]} python -V', check=0) + run_command(f'sudo PATH={os.environ["PATH"]} python3 -V', check=0) + + if test_py.github_workflow_unimportant(): + return + + # Set default behaviour. + # + gdb = False + use_installer = True + mupdf_do = True + mupdf_dir = 'mupdf' + mupdf_git = None + mupdf_so_mode = None + packages = True + prefix = '/usr/local' + pymupdf_do = True + root = 'pymupdf-sysinstall-test-root' + tesseract5 = True + pytest_args = None + pytest_do = True + pytest_name = None + test_venv = 'venv-pymupdf-sysinstall-test' + pip = 'venv' + test_fitz = '1' + test_implementations = 'rR' + + # Parse command-line. + # + env_args_pre = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE', '')) + env_args_post = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST', '')) + args = iter(env_args_pre + sys.argv[1:] + env_args_post) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg in ('-h', '--help'): + log(__doc__) + return + elif arg == '--gdb': gdb = int(next(args)) + elif arg == '--mupdf-do': mupdf_do = int(next(args)) + elif arg == '--mupdf-dir': mupdf_dir = next(args) + elif arg == '--mupdf-git': mupdf_git = next(args) + elif arg == '--mupdf-so-mode': mupdf_so_mode = next(args) + elif arg == '--packages': packages = int(next(args)) + elif arg == '--prefix': prefix = next(args) + elif arg == '--pymupdf-do': pymupdf_do = int(next(args)) + elif arg == '--root': root = next(args) + elif arg == '--tesseract5': tesseract5 = int(next(args)) + elif arg == '--pytest-do': pytest_do = int(next(args)) + elif arg == '--test-venv': test_venv = next(args) + elif arg == '--use-installer': use_installer = int(next(args)) + elif arg == '--pip': pip = next(args) + elif arg == '-f': test_fitz = next(args) + elif arg == '-i': test_implementations = next(args) + elif arg == '-p': pytest_args = next(args) + elif arg == '-t': pytest_name = next(args) + else: + assert 0, f'Unrecognised arg: {arg!r}' + + assert prefix.startswith('/') + pip_values = ('0', 'sudo', 'venv') + assert pip in pip_values, f'Unrecognised --pip value {pip!r} should be one of: {pip_values!r}' + root = os.path.abspath(root) + root_prefix = f'{root}{prefix}'.replace('//', '/') + + sudo = '' + if root == '/': + sudo = f'sudo PATH={os.environ["PATH"]} ' + def run(command, env_extra=None): + return run_command(command, doit=mupdf_do, env_extra=env_extra) + # Get MuPDF from git if specified. + # + if mupdf_git: + # Update existing checkout or do `git clone`. + if os.path.exists(mupdf_dir): + log(f'## Update MuPDF checkout {mupdf_dir}.') + run(f'cd {mupdf_dir} && git pull && git submodule update --init') + else: + # No existing git checkout, so do a fresh clone. + log(f'## Clone MuPDF into {mupdf_dir}.') + run(f'git clone --recursive --depth 1 --shallow-submodules {mupdf_git} {mupdf_dir}') + + if packages: + # Install required system packages. We assume a Debian package system. + # + log('## Install system packages required by MuPDF.') + run(f'sudo apt update') + run(f'sudo apt install {" ".join(g_sys_packages)}') + # Ubuntu-22.04 has freeglut3-dev, not libglut-dev. + run(f'sudo apt install libglut-dev | sudo apt install freeglut3-dev') + if tesseract5: + log(f'## Force installation of libtesseract-dev version 5.') + # https://stackoverflow.com/questions/76834972/how-can-i-run-pytesseract-python-library-in-ubuntu-22-04 + # + run('sudo apt install -y software-properties-common') + run('sudo add-apt-repository ppa:alex-p/tesseract-ocr-devel') + run('sudo apt update') + run('sudo apt install -y libtesseract-dev') + else: + run('sudo apt install libtesseract-dev') + + # Build+install MuPDF. We use mupd:Makefile's install-shared-python target. + # + if pip == 'sudo': + log('## Installing Python packages required for building MuPDF and PyMuPDF.') + #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226. + # We need to install psutil and pillow as system packages, otherwise things like `import psutil` + # fail, seemingly because of pip warning: + # + # WARNING: Running pip as the 'root' user can result in broken + # permissions and conflicting behaviour with the system package + # manager. It is recommended to use a virtual environment instead: + # https://pip.pypa.io/warnings/venv + # + names = test_py.wrap_get_requires_for_build_wheel(f'{__file__}/../..') + names = names.split(' ') + names = [n for n in names if n not in ('psutil', 'pillow')] + names = ' '.join(names) + run(f'sudo pip install {names}') + run(f'sudo apt install python3-psutil python3-pillow') + + log('## Build and install MuPDF.') + command = f'cd {mupdf_dir}' + command += f' && {sudo}make' + command += f' -j {multiprocessing.cpu_count()}' + #command += f' EXE_LDFLAGS=-Wl,--trace' # Makes linker generate diagnostics as it runs. + command += f' DESTDIR={root}' + command += f' HAVE_LEPTONICA=yes' + command += f' HAVE_TESSERACT=yes' + command += f' USE_SYSTEM_LIBS=yes' + # We need latest zxingcpp so system version not ok. + command += f' USE_SYSTEM_ZXINGCPP=no' + command += f' barcode=yes' + command += f' VENV_FLAG={"--venv" if pip == "venv" else ""}' + if mupdf_so_mode: + command += f' SO_INSTALL_MODE={mupdf_so_mode}' + command += f' build_prefix=system-libs-' + command += f' prefix={prefix}' + command += f' verbose=yes' + command += f' install-shared-python' + command += f' INSTALL_MODE=755' + run( command) + + # Build+install PyMuPDF. + # + log('## Build and install PyMuPDF.') + def run(command): + return run_command(command, doit=pymupdf_do) + flags_freetype2 = run_command('pkg-config --cflags freetype2', capture=1) + compile_flags = f'-I {root_prefix}/include {flags_freetype2}' + link_flags = f'-L {root_prefix}/lib' + env = '' + env += f'CFLAGS="{compile_flags}" ' + env += f'CXXFLAGS="{compile_flags}" ' + env += f'LDFLAGS="-L {root}/{prefix}/lib" ' + env += f'PYMUPDF_SETUP_MUPDF_BUILD= ' # Use system MuPDF. + if use_installer: + log(f'## Building wheel.') + if pip == 'venv': + venv_name = 'venv-pymupdf-sysinstall' + run(f'pwd') + run(f'rm dist/* || true') + if pip == 'venv': + run(f'{sys.executable} -m venv {venv_name}') + run(f'. {venv_name}/bin/activate && pip install --upgrade pip') + run(f'. {venv_name}/bin/activate && pip install --upgrade installer') + run(f'{env} {venv_name}/bin/python -m pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}') + elif pip == 'sudo': + #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226. + run(f'sudo pip install installer') + run(f'{env} pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}') + else: + log(f'Not installing "installer" because {pip=}.') + wheel = glob.glob(f'dist/*') + assert len(wheel) == 1, f'{wheel=}' + wheel = wheel[0] + log(f'## Installing wheel using `installer`.') + pv = '.'.join(platform.python_version_tuple()[:2]) + p = f'{root_prefix}/lib/python{pv}' + # `python -m installer` fails to overwrite existing files. + run(f'{sudo}rm -r {p}/site-packages/pymupdf || true') + run(f'{sudo}rm -r {p}/site-packages/pymupdf.py || true') + run(f'{sudo}rm -r {p}/site-packages/fitz || true') + run(f'{sudo}rm -r {p}/site-packages/fitz.py || true') + run(f'{sudo}rm -r {p}/site-packages/pymupdf-*.dist-info || true') + run(f'{sudo}rm -r {root_prefix}/bin/pymupdf || true') + if pip == 'venv': + run(f'{sudo}{venv_name}/bin/python -m installer --destdir {root} --prefix {prefix} {wheel}') + else: + run(f'{sudo}{sys.executable} -m installer --destdir {root} --prefix {prefix} {wheel}') + # It seems that MuPDF Python bindings are installed into + # `.../dist-packages` (from mupdf:Mafile's call of `$(shell python3 + # -c "import sysconfig; print(sysconfig.get_path('platlib'))")` while + # `python -m installer` installs PyMuPDF into `.../site-packages`. + # + # This might be because `sysconfig.get_path('platlib')` returns + # `.../site-packages` if run in a venv, otherwise `.../dist-packages`. + # + # And on github ubuntu-latest, sysconfig.get_path("platlib") is + # /opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages + # + # So we set pythonpath (used later) to import from all + # `pythonX.Y/site-packages/` and `pythonX.Y/dist-packages` directories + # within `root_prefix`: + # + pv = platform.python_version().split('.') + pv = f'python{pv[0]}.{pv[1]}' + pythonpath = list() + for dirpath, dirnames, filenames in os.walk(root_prefix): + if os.path.basename(dirpath) == pv: + for leaf in 'site-packages', 'dist-packages': + if leaf in dirnames: + pythonpath.append(os.path.join(dirpath, leaf)) + pythonpath = ':'.join(pythonpath) + log(f'{pythonpath=}') + else: + command = f'{env} pip install -vv --root {root} {os.path.abspath(pymupdf_dir)}' + run( command) + pythonpath = pipcl.install_dir(root) + + # Show contents of installation directory. This is very slow on github, + # where /usr/local contains lots of things. + #run(f'find {root_prefix}|sort') + + # Run pytest tests. + # + log('## Run PyMuPDF pytest tests.') + def run(command, env_extra=None): + return run_command(command, doit=pytest_do, env_extra=env_extra, caller=1) + import gh_release + if pip == 'venv': + # Create venv. + run(f'{sys.executable} -m venv {test_venv}') + # Install required packages. + command = f'. {test_venv}/bin/activate' + command += f' && pip install --upgrade pip' + command += f' && pip install --upgrade {gh_release.test_packages}' + run(command) + elif pip == 'sudo': + names = gh_release.test_packages + names = names.split(' ') + names = [n for n in names if n not in ('psutil', 'pillow')] + names = ' '.join(names) + run(f'sudo pip install --upgrade {names}') + else: + log(f'Not installing packages for testing because {pip=}.') + # Run pytest. + # + # We need to set PYTHONPATH and LD_LIBRARY_PATH. In particular we + # use pipcl.install_dir() to find where pipcl will have installed + # PyMuPDF. + command = '' + if pip == 'venv': + command += f'. {test_venv}/bin/activate &&' + command += f' LD_LIBRARY_PATH={root_prefix}/lib PYTHONPATH={pythonpath} PATH=$PATH:{root_prefix}/bin' + run(f'ls -l {root_prefix}/bin/') + # 2024-03-20: Not sure whether/where `pymupdf` binary is installed, so we + # disable the test_cli* tests. + command += f' {pymupdf_dir}/scripts/test.py' + if gdb: + command += ' --gdb 1' + command += f' -v 0' + if pytest_name is None: + excluded_tests = ( + 'test_color_count', + 'test_3050', + 'test_cli', + 'test_cli_out', + 'test_pylint', + 'test_textbox3', + 'test_3493', + 'test_4180', + ) + excluded_tests = ' and not '.join(excluded_tests) + if not pytest_args: + pytest_args = '' + pytest_args += f' -k \'not {excluded_tests}\'' + else: + command += f' -t {pytest_name}' + if test_fitz: + command += f' -f {test_fitz}' + if test_implementations: + command += f' -i {test_implementations}' + if pytest_args: + command += f' -p {shlex.quote(pytest_args)}' + if pytest_do: + command += ' test' + run(command, env_extra=dict(PYMUPDF_SYSINSTALL_TEST='1')) + + +def run_command(command, capture=False, check=True, doit=True, env_extra=None, caller=0): + if doit: + return pipcl.run(command, capture=capture, check=check, caller=caller+2, env_extra=env_extra) + else: + log(f'## Would have run: {command}', caller=2) + + +if __name__ == '__main__': + main()
