comparison scripts/sysinstall.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 #! /usr/bin/env python3
2
3 '''
4 Test for Linux system install of MuPDF and PyMuPDF.
5
6 We build and install MuPDF and PyMuPDF into a root directory, then use
7 scripts/test.py to run PyMuPDF's pytest tests with LD_PRELOAD_PATH and
8 PYTHONPATH set.
9
10 PyMuPDF itself is installed using `python -m install` with a wheel created with
11 `pip wheel`.
12
13 We run install commands with `sudo` if `--root /` is used.
14
15 Note that we run some commands with sudo; it's important that these use the
16 same python as non-sudo, otherwise things can be build and installed for
17 different python versions. For example when we are run from a github action, it
18 should not do `- uses: actions/setup-python@v5` but instead use whatever system
19 python is already defined.
20
21 Args:
22
23 --gdb 0|1
24 --mupdf-dir <mupdf_dir>
25 Path of MuPDF checkout; default is 'mupdf'.
26 --mupdf-do 0|1
27 Whether to build and install mupdf.
28 --mupdf-git <git_args>
29 Get or update `mupdf_dir` using git. If `mupdf_dir` already
30 exists we run `git pull` in it; otherwise we run `git
31 clone` with `<git_args> <mupdf_dir>`. For example:
32 --mupdf-git "--branch master https://github.com/ArtifexSoftware/mupdf.git"
33 --mupdf-so-mode <mode>
34 Used with `install -m <mode> ...` when installing MuPDF. For example
35 `--mupdf-so-mode 744`.
36 --packages 0|1
37 If 1 (the default) we install required system packages such as
38 `libfreetype-dev`.
39 --pip 0|venv|sudo
40 Whether/how to install Python packages.
41 If '0' we assume required packages are already available.
42 If 'sudo' we install required Python packages using `sudo pip install
43 ...`.
44 If 'venv' (the default) we install Python packages and run installer
45 and test commands inside venv's.
46 --prefix:
47 Directory within `root`; default is `/usr/local`. Must start with `/`.
48 --pymupdf-dir <pymupdf_dir>
49 Path of PyMuPDF checkout; default is 'PyMuPDF'.
50 --pymupdf-do 0|1
51 Whether to build and install pymupdf.
52 --root <root>
53 Root of install directory; default is 'pymupdf-sysinstall-test-root'.
54 --tesseract5 0|1
55 If 1 (the default), we force installation of libtesseract-dev version
56 5 (which is not available as a default package in Ubuntu-22.04) from
57 package repository ppa:alex-p/tesseract-ocr-devel.
58 --test-venv <test_venv>
59 Set the name of the venv in which we run tests (only with `--pip
60 venv`); the default is a hard-coded venv name. The venv will be
61 created, and required packages installed using `pip`.
62 --use-installer 0|1
63 If 1 (the default), we use `python -m installer` to install PyMuPDF
64 from a generated wheel. [Otherwise we use `pip install`, which refuses
65 to do a system install with `--root /`, referencing PEP-668.]
66 -i <implementations>
67 Passed through to scripts/test.py. Default is 'rR'.
68 -f <test-fitz>
69 Passed through to scripts/test.py. Default is '1'.
70 -p <pytest-options>
71 Passed through to scripts/test.py.
72 -t <names>
73 Passed through to scripts/test.py.
74
75 To only show what commands would be run, but not actually run them, specify `-m
76 0 -p 0 -t 0`.
77 '''
78
79 import glob
80 import multiprocessing
81 import os
82 import platform
83 import shlex
84 import subprocess
85 import sys
86 import sysconfig
87
88 import test as test_py
89
90 pymupdf_dir = os.path.abspath( f'{__file__}/../..')
91
92 sys.path.insert(0, pymupdf_dir)
93 import pipcl
94 del sys.path[0]
95
96 log = pipcl.log0
97
98 # Requirements for a system build and install:
99 #
100 # system packages (Debian names):
101 #
102 g_sys_packages = [
103 'libfreetype-dev',
104 'libgumbo-dev',
105 'libharfbuzz-dev',
106 'libjbig2dec-dev',
107 'libjpeg-dev',
108 'libleptonica-dev',
109 'libopenjp2-7-dev',
110 ]
111 # We also need libtesseract-dev version 5.
112 #
113
114
115 def main():
116
117 if 1:
118 log(f'## {__file__}: Starting.')
119 log(f'{sys.executable=}')
120 log(f'{platform.python_version()=}')
121 log(f'{__file__=}')
122 log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE")=}')
123 log(f'{os.environ.get("PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST")=}')
124 log(f'{sys.argv=}')
125 log(f'{sysconfig.get_path("platlib")=}')
126 run_command(f'python -V', check=0)
127 run_command(f'python3 -V', check=0)
128 run_command(f'sudo python -V', check=0)
129 run_command(f'sudo python3 -V', check=0)
130 run_command(f'sudo PATH={os.environ["PATH"]} python -V', check=0)
131 run_command(f'sudo PATH={os.environ["PATH"]} python3 -V', check=0)
132
133 if test_py.github_workflow_unimportant():
134 return
135
136 # Set default behaviour.
137 #
138 gdb = False
139 use_installer = True
140 mupdf_do = True
141 mupdf_dir = 'mupdf'
142 mupdf_git = None
143 mupdf_so_mode = None
144 packages = True
145 prefix = '/usr/local'
146 pymupdf_do = True
147 root = 'pymupdf-sysinstall-test-root'
148 tesseract5 = True
149 pytest_args = None
150 pytest_do = True
151 pytest_name = None
152 test_venv = 'venv-pymupdf-sysinstall-test'
153 pip = 'venv'
154 test_fitz = '1'
155 test_implementations = 'rR'
156
157 # Parse command-line.
158 #
159 env_args_pre = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_PRE', ''))
160 env_args_post = shlex.split(os.environ.get('PYMUDF_SCRIPTS_SYSINSTALL_ARGS_POST', ''))
161 args = iter(env_args_pre + sys.argv[1:] + env_args_post)
162 while 1:
163 try:
164 arg = next(args)
165 except StopIteration:
166 break
167 if arg in ('-h', '--help'):
168 log(__doc__)
169 return
170 elif arg == '--gdb': gdb = int(next(args))
171 elif arg == '--mupdf-do': mupdf_do = int(next(args))
172 elif arg == '--mupdf-dir': mupdf_dir = next(args)
173 elif arg == '--mupdf-git': mupdf_git = next(args)
174 elif arg == '--mupdf-so-mode': mupdf_so_mode = next(args)
175 elif arg == '--packages': packages = int(next(args))
176 elif arg == '--prefix': prefix = next(args)
177 elif arg == '--pymupdf-do': pymupdf_do = int(next(args))
178 elif arg == '--root': root = next(args)
179 elif arg == '--tesseract5': tesseract5 = int(next(args))
180 elif arg == '--pytest-do': pytest_do = int(next(args))
181 elif arg == '--test-venv': test_venv = next(args)
182 elif arg == '--use-installer': use_installer = int(next(args))
183 elif arg == '--pip': pip = next(args)
184 elif arg == '-f': test_fitz = next(args)
185 elif arg == '-i': test_implementations = next(args)
186 elif arg == '-p': pytest_args = next(args)
187 elif arg == '-t': pytest_name = next(args)
188 else:
189 assert 0, f'Unrecognised arg: {arg!r}'
190
191 assert prefix.startswith('/')
192 pip_values = ('0', 'sudo', 'venv')
193 assert pip in pip_values, f'Unrecognised --pip value {pip!r} should be one of: {pip_values!r}'
194 root = os.path.abspath(root)
195 root_prefix = f'{root}{prefix}'.replace('//', '/')
196
197 sudo = ''
198 if root == '/':
199 sudo = f'sudo PATH={os.environ["PATH"]} '
200 def run(command, env_extra=None):
201 return run_command(command, doit=mupdf_do, env_extra=env_extra)
202 # Get MuPDF from git if specified.
203 #
204 if mupdf_git:
205 # Update existing checkout or do `git clone`.
206 if os.path.exists(mupdf_dir):
207 log(f'## Update MuPDF checkout {mupdf_dir}.')
208 run(f'cd {mupdf_dir} && git pull && git submodule update --init')
209 else:
210 # No existing git checkout, so do a fresh clone.
211 log(f'## Clone MuPDF into {mupdf_dir}.')
212 run(f'git clone --recursive --depth 1 --shallow-submodules {mupdf_git} {mupdf_dir}')
213
214 if packages:
215 # Install required system packages. We assume a Debian package system.
216 #
217 log('## Install system packages required by MuPDF.')
218 run(f'sudo apt update')
219 run(f'sudo apt install {" ".join(g_sys_packages)}')
220 # Ubuntu-22.04 has freeglut3-dev, not libglut-dev.
221 run(f'sudo apt install libglut-dev | sudo apt install freeglut3-dev')
222 if tesseract5:
223 log(f'## Force installation of libtesseract-dev version 5.')
224 # https://stackoverflow.com/questions/76834972/how-can-i-run-pytesseract-python-library-in-ubuntu-22-04
225 #
226 run('sudo apt install -y software-properties-common')
227 run('sudo add-apt-repository ppa:alex-p/tesseract-ocr-devel')
228 run('sudo apt update')
229 run('sudo apt install -y libtesseract-dev')
230 else:
231 run('sudo apt install libtesseract-dev')
232
233 # Build+install MuPDF. We use mupd:Makefile's install-shared-python target.
234 #
235 if pip == 'sudo':
236 log('## Installing Python packages required for building MuPDF and PyMuPDF.')
237 #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226.
238 # We need to install psutil and pillow as system packages, otherwise things like `import psutil`
239 # fail, seemingly because of pip warning:
240 #
241 # WARNING: Running pip as the 'root' user can result in broken
242 # permissions and conflicting behaviour with the system package
243 # manager. It is recommended to use a virtual environment instead:
244 # https://pip.pypa.io/warnings/venv
245 #
246 names = test_py.wrap_get_requires_for_build_wheel(f'{__file__}/../..')
247 names = names.split(' ')
248 names = [n for n in names if n not in ('psutil', 'pillow')]
249 names = ' '.join(names)
250 run(f'sudo pip install {names}')
251 run(f'sudo apt install python3-psutil python3-pillow')
252
253 log('## Build and install MuPDF.')
254 command = f'cd {mupdf_dir}'
255 command += f' && {sudo}make'
256 command += f' -j {multiprocessing.cpu_count()}'
257 #command += f' EXE_LDFLAGS=-Wl,--trace' # Makes linker generate diagnostics as it runs.
258 command += f' DESTDIR={root}'
259 command += f' HAVE_LEPTONICA=yes'
260 command += f' HAVE_TESSERACT=yes'
261 command += f' USE_SYSTEM_LIBS=yes'
262 # We need latest zxingcpp so system version not ok.
263 command += f' USE_SYSTEM_ZXINGCPP=no'
264 command += f' barcode=yes'
265 command += f' VENV_FLAG={"--venv" if pip == "venv" else ""}'
266 if mupdf_so_mode:
267 command += f' SO_INSTALL_MODE={mupdf_so_mode}'
268 command += f' build_prefix=system-libs-'
269 command += f' prefix={prefix}'
270 command += f' verbose=yes'
271 command += f' install-shared-python'
272 command += f' INSTALL_MODE=755'
273 run( command)
274
275 # Build+install PyMuPDF.
276 #
277 log('## Build and install PyMuPDF.')
278 def run(command):
279 return run_command(command, doit=pymupdf_do)
280 flags_freetype2 = run_command('pkg-config --cflags freetype2', capture=1)
281 compile_flags = f'-I {root_prefix}/include {flags_freetype2}'
282 link_flags = f'-L {root_prefix}/lib'
283 env = ''
284 env += f'CFLAGS="{compile_flags}" '
285 env += f'CXXFLAGS="{compile_flags}" '
286 env += f'LDFLAGS="-L {root}/{prefix}/lib" '
287 env += f'PYMUPDF_SETUP_MUPDF_BUILD= ' # Use system MuPDF.
288 if use_installer:
289 log(f'## Building wheel.')
290 if pip == 'venv':
291 venv_name = 'venv-pymupdf-sysinstall'
292 run(f'pwd')
293 run(f'rm dist/* || true')
294 if pip == 'venv':
295 run(f'{sys.executable} -m venv {venv_name}')
296 run(f'. {venv_name}/bin/activate && pip install --upgrade pip')
297 run(f'. {venv_name}/bin/activate && pip install --upgrade installer')
298 run(f'{env} {venv_name}/bin/python -m pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}')
299 elif pip == 'sudo':
300 #run(f'sudo pip install --upgrade pip') # Breaks on Github see: https://github.com/pypa/get-pip/issues/226.
301 run(f'sudo pip install installer')
302 run(f'{env} pip wheel -vv -w dist {os.path.abspath(pymupdf_dir)}')
303 else:
304 log(f'Not installing "installer" because {pip=}.')
305 wheel = glob.glob(f'dist/*')
306 assert len(wheel) == 1, f'{wheel=}'
307 wheel = wheel[0]
308 log(f'## Installing wheel using `installer`.')
309 pv = '.'.join(platform.python_version_tuple()[:2])
310 p = f'{root_prefix}/lib/python{pv}'
311 # `python -m installer` fails to overwrite existing files.
312 run(f'{sudo}rm -r {p}/site-packages/pymupdf || true')
313 run(f'{sudo}rm -r {p}/site-packages/pymupdf.py || true')
314 run(f'{sudo}rm -r {p}/site-packages/fitz || true')
315 run(f'{sudo}rm -r {p}/site-packages/fitz.py || true')
316 run(f'{sudo}rm -r {p}/site-packages/pymupdf-*.dist-info || true')
317 run(f'{sudo}rm -r {root_prefix}/bin/pymupdf || true')
318 if pip == 'venv':
319 run(f'{sudo}{venv_name}/bin/python -m installer --destdir {root} --prefix {prefix} {wheel}')
320 else:
321 run(f'{sudo}{sys.executable} -m installer --destdir {root} --prefix {prefix} {wheel}')
322 # It seems that MuPDF Python bindings are installed into
323 # `.../dist-packages` (from mupdf:Mafile's call of `$(shell python3
324 # -c "import sysconfig; print(sysconfig.get_path('platlib'))")` while
325 # `python -m installer` installs PyMuPDF into `.../site-packages`.
326 #
327 # This might be because `sysconfig.get_path('platlib')` returns
328 # `.../site-packages` if run in a venv, otherwise `.../dist-packages`.
329 #
330 # And on github ubuntu-latest, sysconfig.get_path("platlib") is
331 # /opt/hostedtoolcache/Python/3.11.7/x64/lib/python3.11/site-packages
332 #
333 # So we set pythonpath (used later) to import from all
334 # `pythonX.Y/site-packages/` and `pythonX.Y/dist-packages` directories
335 # within `root_prefix`:
336 #
337 pv = platform.python_version().split('.')
338 pv = f'python{pv[0]}.{pv[1]}'
339 pythonpath = list()
340 for dirpath, dirnames, filenames in os.walk(root_prefix):
341 if os.path.basename(dirpath) == pv:
342 for leaf in 'site-packages', 'dist-packages':
343 if leaf in dirnames:
344 pythonpath.append(os.path.join(dirpath, leaf))
345 pythonpath = ':'.join(pythonpath)
346 log(f'{pythonpath=}')
347 else:
348 command = f'{env} pip install -vv --root {root} {os.path.abspath(pymupdf_dir)}'
349 run( command)
350 pythonpath = pipcl.install_dir(root)
351
352 # Show contents of installation directory. This is very slow on github,
353 # where /usr/local contains lots of things.
354 #run(f'find {root_prefix}|sort')
355
356 # Run pytest tests.
357 #
358 log('## Run PyMuPDF pytest tests.')
359 def run(command, env_extra=None):
360 return run_command(command, doit=pytest_do, env_extra=env_extra, caller=1)
361 import gh_release
362 if pip == 'venv':
363 # Create venv.
364 run(f'{sys.executable} -m venv {test_venv}')
365 # Install required packages.
366 command = f'. {test_venv}/bin/activate'
367 command += f' && pip install --upgrade pip'
368 command += f' && pip install --upgrade {gh_release.test_packages}'
369 run(command)
370 elif pip == 'sudo':
371 names = gh_release.test_packages
372 names = names.split(' ')
373 names = [n for n in names if n not in ('psutil', 'pillow')]
374 names = ' '.join(names)
375 run(f'sudo pip install --upgrade {names}')
376 else:
377 log(f'Not installing packages for testing because {pip=}.')
378 # Run pytest.
379 #
380 # We need to set PYTHONPATH and LD_LIBRARY_PATH. In particular we
381 # use pipcl.install_dir() to find where pipcl will have installed
382 # PyMuPDF.
383 command = ''
384 if pip == 'venv':
385 command += f'. {test_venv}/bin/activate &&'
386 command += f' LD_LIBRARY_PATH={root_prefix}/lib PYTHONPATH={pythonpath} PATH=$PATH:{root_prefix}/bin'
387 run(f'ls -l {root_prefix}/bin/')
388 # 2024-03-20: Not sure whether/where `pymupdf` binary is installed, so we
389 # disable the test_cli* tests.
390 command += f' {pymupdf_dir}/scripts/test.py'
391 if gdb:
392 command += ' --gdb 1'
393 command += f' -v 0'
394 if pytest_name is None:
395 excluded_tests = (
396 'test_color_count',
397 'test_3050',
398 'test_cli',
399 'test_cli_out',
400 'test_pylint',
401 'test_textbox3',
402 'test_3493',
403 'test_4180',
404 )
405 excluded_tests = ' and not '.join(excluded_tests)
406 if not pytest_args:
407 pytest_args = ''
408 pytest_args += f' -k \'not {excluded_tests}\''
409 else:
410 command += f' -t {pytest_name}'
411 if test_fitz:
412 command += f' -f {test_fitz}'
413 if test_implementations:
414 command += f' -i {test_implementations}'
415 if pytest_args:
416 command += f' -p {shlex.quote(pytest_args)}'
417 if pytest_do:
418 command += ' test'
419 run(command, env_extra=dict(PYMUPDF_SYSINSTALL_TEST='1'))
420
421
422 def run_command(command, capture=False, check=True, doit=True, env_extra=None, caller=0):
423 if doit:
424 return pipcl.run(command, capture=capture, check=check, caller=caller+2, env_extra=env_extra)
425 else:
426 log(f'## Would have run: {command}', caller=2)
427
428
429 if __name__ == '__main__':
430 main()