comparison mupdf-source/setup.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #!/usr/bin/env python3
2
3 '''
4 Installation script for MuPDF Python bindings, using scripts/pipcl.py.
5
6 Notes:
7
8 When building an sdist (e.g. with 'pip sdist'), we use clang-python to
9 generate C++ source which is then included in the sdist.
10
11 This allows wheels to be built from an sdist without requiring clang-python
12 to be installed.
13
14
15 Internal testing only - environmental variables:
16
17 MUPDF_SETUP_BUILD_DIR
18 Overrides the default build directory.
19
20 MUPDF_SETUP_USE_CLANG_PYTHON
21 Affects whether we use clang-python when building.
22
23 If set, must be '0' or '1', and we override the default and do not
24 ('0') / do ('1') use clang-python to generate C++ source code from
25 MuPDF headers.
26
27 If we are an sdist we default to not re-generating C++ - the generated
28 files will be already available in platform/c++/. Otherwise we default
29 to generating C++ source code.
30
31 MUPDF_SETUP_USE_SWIG
32 If set, must be '0' or '1', and we do not ('0') / do ('1') attempt to
33 run swig.
34 '''
35
36 import os
37 import platform
38 import re
39 import subprocess
40 import sys
41 import time
42
43
44 def log(text=''):
45 for line in text.split('\n'):
46 print(f'mupdf:setup.py: {line}')
47 sys.stdout.flush()
48
49 def cache(function):
50 '''
51 Simple (and probably unnecessary) caching decorator.
52 '''
53 cache = {}
54 def wrapper(*args):
55 if not args in cache:
56 cache[args] = function()
57 return cache[args]
58 return wrapper
59
60 @cache
61 def root_dir():
62 return os.path.dirname(os.path.abspath(__file__))
63
64 @cache
65 def windows():
66 s = platform.system()
67 return s == 'Windows' or s.startswith('CYGWIN')
68
69 @cache
70 def macos():
71 s = platform.system()
72 return s == 'Darwin'
73
74 @cache
75 def openbsd():
76 s = platform.system()
77 return s == 'OpenBSD'
78
79 @cache
80 def msys2():
81 return platform.system().startswith('MSYS_NT-')
82
83 @cache
84 def build_dir():
85 # This is x86/x64-specific.
86 #
87 # We generate 32 or 64-bit binaries to match whatever Python we
88 # are running under.
89 #
90 ret = os.environ.get('MUPDF_SETUP_BUILD_DIR')
91 if ret is None:
92 cpu = 'x32' if sys.maxsize == 2**31 - 1 else 'x64'
93 python_version = '.'.join(platform.python_version().split('.')[:2])
94 ret = f'{root_dir()}/build/shared-release-{cpu}-py{python_version}'
95 return ret
96
97 @cache
98 def in_sdist():
99 return os.path.exists(f'{root_dir()}/PKG-INFO')
100
101 sys.path.append(f'{root_dir()}/scripts')
102 import pipcl
103
104
105 @cache
106 def mupdf_version():
107 '''
108 Returns version string.
109
110 If $MUPDF_SETUP_VERSION is set we use it directly, asserting that it starts
111 with the version string defined in include/mupdf/fitz/version.h.
112
113 Otherwise if we are in an sdist ('PKG-INFO' exists) we use its
114 version. We assert that this starts with the base version in
115 include/mupdf/fitz/version.h.
116
117 Otherwise we generate a version string by appending the current date and
118 time to the base version in include/mupdf/fitz/version.h. For example
119 '1.18.0.20210330.1800'.
120 '''
121 return mupdf_version_internal()
122
123
124 def mupdf_version_internal(t_tuple=None):
125 '''
126 Return version number, with doctest check for broken behaviour with leading
127 zeros.
128
129 >>> t0str = '2024-06-06-00:00'
130 >>> t0tuple = time.strptime(t0str, '%Y-%m-%d-%H:%M')
131 >>> v = mupdf_version_internal(t0tuple)
132 >>> print(v, file=sys.stderr)
133 >>> assert v.endswith('.202406060000')
134 '''
135 with open(f'{root_dir()}/include/mupdf/fitz/version.h') as f:
136 text = f.read()
137 m = re.search('\n#define FZ_VERSION "([^"]+)"\n', text)
138 assert m
139 base_version = m.group(1)
140
141 # If MUPDF_SETUP_VERSION exists, use it.
142 #
143 ret = os.environ.get('MUPDF_SETUP_VERSION')
144 if ret:
145 log(f'Using version from $MUPDF_SETUP_VERSION: {ret}')
146 assert ret.startswith(base_version)
147 return ret
148
149 # If we are in an sdist, so use the version from the PKG-INFO file.
150 #
151 if in_sdist():
152 items = pipcl.parse_pkg_info('PKG-INFO')
153 assert items['Name'] == 'mupdf'
154 ret = items['Version']
155 #log(f'Using version from PKG-INFO: {ret}')
156 assert ret.startswith(base_version)
157 return ret
158
159 # If we get here, we are in a source tree.
160 #
161 # We use the MuPDF version with a unique(ish) suffix based on the current
162 # date and time, so we can make multiple Python releases without requiring
163 # an increment to the MuPDF version.
164 #
165 # This also allows us to easily experiment on test.pypi.org.
166 #
167 # We have to avoid the time component(s) containing `.0` as this is
168 # prohibited by PEP-440.
169 #
170 if t_tuple is None:
171 t_tuple = time.localtime()
172 tt = time.strftime(".%Y%m%d%H%M", t_tuple)
173 tail = tt.replace('.0', '.')
174 ret = base_version + tail
175 #log(f'Have created version number: {ret}')
176 pipcl._assert_version_pep_440(ret)
177 return ret
178
179
180 def git_info():
181 '''
182 Returns (current, origin, diff):
183 current: git id from 'git show'.
184 origin: git id from 'git show origin'.
185 diff: diff relative to current.
186 '''
187 def get_id(command):
188 text = subprocess.check_output(command, shell=True, cwd=root_dir())
189 text = text.decode('utf8')
190 text = text.split('\n', 1)[0]
191 text = text.split(' ', 1)[0]
192 return text
193 current = get_id('git show --pretty=oneline')
194 origin = get_id('git show --pretty=oneline origin')
195 diff = subprocess.check_output(f'cd {root_dir()} && git diff', shell=True).decode('utf8')
196 return current, origin, diff
197
198
199 def get_flag(name, default):
200 '''
201 name:
202 Name of environmental variable.
203 default:
204 Value to return if <name> undefined.
205 Returns False if name is '0', True if name is '1', <default> if
206 undefined. Otherwise assert fails.
207 '''
208 value = os.environ.get(name)
209 if value is None:
210 ret = default
211 elif value == '0':
212 ret = False
213 elif value == '1':
214 ret = True
215 else:
216 assert 0, f'If set, ${name} must be "0" or "1", but is: {value!r}'
217 log(f'name={name} default={default} value={value} ret={ret}')
218 return ret
219
220
221 # pipcl Callbacks.
222 #
223
224 def sdist():
225 '''
226 pipcl callback. We run './scripts/mupdfwrap.py -b 0' to create C++ files
227 etc using clang-python, and return these generated files plus all files
228 known to git. [This allows sdists to be used to generate wheels etc on
229 machines without clang-python.]
230 '''
231 assert os.path.exists(f'{root_dir()}/.git'), f'Cannot make sdist because not a git checkout: {root_dir()}'
232
233 # Create 'git-info' file containing git ids that identify this tree. For
234 # the moment this is a simple text format, but we could possibly use pickle
235 # instead, depending on whether we want to include more information, e.g.
236 # diff relative to origin.
237 #
238 git_id, git_id_origin, git_diff = git_info()
239 with open(f'{root_dir()}/git-info', 'w') as f:
240 f.write(f'git-id: {git_id}\n')
241 f.write(f'git-id-origin: {git_id_origin}\n')
242 f.write(f'git-diff:\n{git_diff}\n')
243
244 paths = pipcl.git_items( root_dir(), submodules=True)
245
246 # Strip out some large test directories.
247 i = 0
248 while i < len( paths):
249 path = paths[i]
250 remove = False
251 if (0
252 or path.startswith( 'thirdparty/harfbuzz/test/')
253 or path.startswith( 'thirdparty/tesseract/test/')
254 or path.startswith( 'thirdparty/extract/test/')
255 ):
256 remove = True
257 if remove:
258 #log( f'Excluding: {path}')
259 del paths[i]
260 else:
261 i += 1
262
263 # Build C++ files and SWIG C code for inclusion in sdist, so that it can be
264 # used on systems without clang-python or SWIG.
265 #
266 use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', True)
267 use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
268 b = ''
269 if use_clang_python:
270 b += '0'
271 if use_swig:
272 b += '2'
273 command = '' if os.getcwd() == root_dir() else f'cd {os.path.relpath(root_dir())} && '
274 command += f'{sys.executable} ./scripts/mupdfwrap.py -d {build_dir()} -b "{b}"'
275 log(f'Running: {command}')
276 subprocess.check_call(command, shell=True)
277 paths += [
278 'build/shared-release/mupdf.py',
279 'git-info',
280 'platform/c++/generated.pickle',
281 'platform/c++/implementation/classes.cpp',
282 'platform/c++/implementation/classes2.cpp',
283 'platform/c++/implementation/exceptions.cpp',
284 'platform/c++/implementation/functions.cpp',
285 'platform/c++/implementation/internal.cpp',
286 'platform/c++/include/mupdf/classes.h',
287 'platform/c++/include/mupdf/classes2.h',
288 'platform/c++/include/mupdf/exceptions.h',
289 'platform/c++/include/mupdf/functions.h',
290 'platform/c++/include/mupdf/internal.h',
291 'platform/c++/windows_mupdf.def',
292 'platform/python/mupdfcpp_swig.i.cpp',
293 ]
294 return paths
295
296
297 def build():
298 '''
299 pipcl callback. Build MuPDF C, C++ and Python libraries and return list of
300 created files.
301 '''
302 # If we are an sdist, default to not trying to run clang-python - the
303 # generated files will already exist, and installing/using clang-python
304 # might be tricky.
305 #
306 use_clang_python = get_flag('MUPDF_SETUP_USE_CLANG_PYTHON', not in_sdist())
307 use_swig = get_flag('MUPDF_SETUP_USE_SWIG', True)
308
309 b = ''
310 if not windows():
311 b = 'm' # Build C library.
312 if use_clang_python:
313 b += '0' # Build C++ source.
314 b += '1' # Build C++ library (also contains C library on Windows).
315 if use_swig:
316 b += '2' # Build SWIG-generated source.
317 b += '3' # Build SWIG library _mupdf.so.
318
319 command = '' if root_dir() == os.getcwd() else f'cd {os.path.relpath(root_dir())} && '
320 command += (
321 f'"{sys.executable}" ./scripts/mupdfwrap.py'
322 f' -d {build_dir()}'
323 f' -b {b}'
324 )
325
326 do_build = os.environ.get('MUPDF_SETUP_DO_BUILD')
327 if do_build == '0':
328 # This is a hack for testing.
329 log(f'Not doing build because $MUPDF_SETUP_DO_BUILD={do_build}')
330 else:
331 log(f'build(): Building MuPDF C, C++ and Python libraries with: {command}')
332 subprocess.check_call(command, shell=True)
333
334 # Return generated files to install or copy into wheel.
335 #
336 if windows():
337 infix = '' if sys.maxsize == 2**31 - 1 else '64'
338 names = [
339 f'{build_dir()}/mupdfcpp{infix}.dll', # C and C++.
340 f'{build_dir()}/_mupdf.pyd', # Python internals.
341 f'{build_dir()}/mupdf.py', # Python.
342 ]
343 elif macos():
344 log( f'Contents of {build_dir()} are:')
345 for leaf in os.listdir(build_dir()):
346 log( f' {leaf}')
347 names = [
348 f'{build_dir()}/libmupdf.dylib', # C.
349 f'{build_dir()}/libmupdfcpp.so', # C++.
350 f'{build_dir()}/_mupdf.so', # Python internals.
351 f'{build_dir()}/mupdf.py', # Python.
352 ]
353 else:
354 names = [
355 pipcl.get_soname(f'{build_dir()}/libmupdf.so'), # C.
356 pipcl.get_soname(f'{build_dir()}/libmupdfcpp.so'), # C++.
357 f'{build_dir()}/_mupdf.so', # Python internals.
358 f'{build_dir()}/mupdf.py', # Python.
359 ]
360 paths = []
361 for name in names:
362 paths.append((name, ''))
363
364 log(f'build(): returning: {paths}')
365 return paths
366
367
368 def clean(all_):
369 if all_:
370 return [
371 'build',
372 'platform/win32/Release',
373 'platform/win32/ReleaseDLL',
374 'platform/win32/Win32',
375 'platform/win32/x64',
376 ]
377 else:
378 # Ideally we would return selected directories in platform/win32/ if on
379 # Windows, but that would get a little involved.
380 #
381 return build_dir()
382
383
384 # Setup pipcl.
385 #
386 description = """
387 Summary
388 -------
389
390 * Python bindings for the MuPDF PDF library.
391 * A python module called ``mupdf``.
392 * Generated from the MuPDF C++ API, which is itself generated from the MuPDF C API.
393 * Provides Python functions that wrap most ``fz_`` and ``pdf_`` functions.
394 * Provides Python classes that wrap most ``fz_`` and ``pdf_`` structs.
395
396 * Class methods provide access to most of the underlying C API functions (except for functions that don't take struct args such as ``fz_strlcpy()``).
397 * MuPDF's ``setjmp``/``longjmp`` exceptions are converted to Python exceptions.
398 * Functions and methods do not take ``fz_context`` arguments. (Automatically-generated per-thread contexts are used internally.)
399 * Wrapper classes automatically handle reference counting of the underlying structs (with internal calls to ``fz_keep_*()`` and ``fz_drop_*()``).
400 * Support for MuPDF function pointers with SWIG Director classes, allowing MuPDF to call Python callbacks.
401 * Provides a small number of extensions beyond the basic C API:
402
403 * Some generated classes have extra support for iteration.
404 * Some custom class methods and constructors.
405 * Simple 'POD' structs have ``__str__()`` methods, for example ``mupdf.Rect`` is represented like: ``(x0=90.51 y0=160.65 x1=501.39 y1=215.6)``.
406
407 Example usage
408 -------------
409
410 Minimal Python code that uses the ``mupdf`` module:
411
412 ::
413
414 import mupdf
415 document = mupdf.Document('foo.pdf')
416
417
418 A simple example Python test script (run by ``scripts/mupdfwrap.py -t``) is:
419
420 * ``scripts/mupdfwrap_test.py``
421
422 More detailed usage of the Python API can be found in:
423
424 * ``scripts/mutool.py``
425 * ``scripts/mutool_draw.py``
426
427 Here is some example code that shows all available information about document's Stext blocks, lines and characters:
428
429 ::
430
431 #!/usr/bin/env python3
432
433 import mupdf
434
435 def show_stext(document):
436 '''
437 Shows all available information about Stext blocks, lines and characters.
438 '''
439 for p in range(document.count_pages()):
440 page = document.load_page(p)
441 stextpage = mupdf.StextPage(page, mupdf.StextOptions())
442 for block in stextpage:
443 block_ = block.m_internal
444 log(f'block: type={block_.type} bbox={block_.bbox}')
445 for line in block:
446 line_ = line.m_internal
447 log(f' line: wmode={line_.wmode}'
448 + f' dir={line_.dir}'
449 + f' bbox={line_.bbox}'
450 )
451 for char in line:
452 char_ = char.m_internal
453 log(f' char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
454 + f' origin={char_.origin}'
455 + f' quad={char_.quad}'
456 + f' size={char_.size:6.2f}'
457 + f' font=('
458 + f'is_mono={char_.font.flags.is_mono}'
459 + f' is_bold={char_.font.flags.is_bold}'
460 + f' is_italic={char_.font.flags.is_italic}'
461 + f' ft_substitute={char_.font.flags.ft_substitute}'
462 + f' ft_stretch={char_.font.flags.ft_stretch}'
463 + f' fake_bold={char_.font.flags.fake_bold}'
464 + f' fake_italic={char_.font.flags.fake_italic}'
465 + f' has_opentype={char_.font.flags.has_opentype}'
466 + f' invalid_bbox={char_.font.flags.invalid_bbox}'
467 + f' name={char_.font.name}'
468 + f')'
469 )
470
471 document = mupdf.Document('foo.pdf')
472 show_stext(document)
473
474 More information
475 ----------------
476
477 https://mupdf.com/r/C-and-Python-APIs
478
479 """
480
481 with open(f'{root_dir()}/COPYING') as f:
482 license = f.read()
483
484 mupdf_package = pipcl.Package(
485 name = 'mupdf',
486 version = mupdf_version(),
487 root = root_dir(),
488 summary = 'Python bindings for MuPDF library.',
489 description = description,
490 classifier = [
491 'Development Status :: 4 - Beta',
492 'Intended Audience :: Developers',
493 'License :: OSI Approved :: GNU Affero General Public License v3',
494 'Programming Language :: Python :: 3',
495 ],
496 author = 'Artifex Software, Inc.',
497 author_email = 'support@artifex.com',
498 home_page = 'https://mupdf.com/',
499 project_url = [
500 ('Documentation, https://mupdf.com/r/C-and-Python-APIs/'),
501 ('Source, https://git.ghostscript.com/?p=mupdf.git'),
502 ('Tracker, https://bugs.ghostscript.com/'),
503 ],
504 keywords = 'PDF',
505 platform = None,
506 license = license,
507 fn_build = build,
508 fn_clean = clean,
509 fn_sdist = sdist,
510 )
511
512
513 # Things to allow us to function as a PIP-517 backend:
514 #
515 def build_wheel( wheel_directory, config_settings=None, metadata_directory=None):
516 return mupdf_package.build_wheel(
517 wheel_directory,
518 config_settings,
519 metadata_directory,
520 )
521
522 def build_sdist( sdist_directory, config_settings=None):
523 return mupdf_package.build_sdist(
524 sdist_directory,
525 config_settings,
526 )
527
528 def get_requires_for_build_wheel(config_settings=None):
529 '''
530 Adds to pyproject.toml:[build-system]:requires, allowing programmatic
531 control over what packages we require.
532 '''
533 ret = list()
534 ret.append('setuptools')
535 if openbsd():
536 #print(f'OpenBSD: libclang not available via pip; assuming `pkg_add py3-llvm`.')
537 pass
538 elif macos() and platform.machine() == 'arm64':
539 #print(
540 # f'MacOS/arm64: forcing use of libclang 16.0.6 because 17.0.6'
541 # f' and 18.1.1 are known to fail with:'
542 # f' `clang.cindex.TranslationUnitLoadError: Error parsing translation unit.`'
543 # )
544 ret.append('libclang==16.0.6')
545 else:
546 ret.append('libclang')
547 if msys2():
548 #print(f'msys2: pip install of swig does not build; assuming `pacman -S swig`.')
549 pass
550 elif openbsd():
551 #print(f'OpenBSD: pip install of swig does not build; assuming `pkg_add swig`.')
552 pass
553 else:
554 ret.append( 'swig')
555 return ret
556
557
558 # Allow us to be used as a pre-PIP-517 setup.py script.
559 #
560 if __name__ == '__main__':
561 mupdf_package.handle_argv(sys.argv)