comparison mupdf-source/scripts/mupdfwrap_test.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #!/usr/bin/env python3
2
3 '''
4 Simple tests of the Python MuPDF API.
5 '''
6
7 import inspect
8 import os
9 import platform
10 import sys
11
12 if os.environ.get('MUPDF_PYTHON') in ('swig', None):
13 # PYTHONPATH should have been set up to point to a build/shared-*/
14 # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
15 import mupdf
16 elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
17 sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
18 import mupdf_cppyy
19 del sys.path[0]
20 mupdf = mupdf_cppyy.cppyy.gbl.mupdf
21 else:
22 raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
23
24
25 _log_prefix = ''
26
27 def log(text):
28 f = inspect.stack()[1]
29 print(f'{f.filename}:{f.lineno} {_log_prefix}{text}', file=sys.stderr)
30 sys.stderr.flush()
31
32 def log_prefix_set(prefix):
33 global _log_prefix
34 _log_prefix = prefix
35
36 g_test_n = 0
37
38 g_mupdf_root = os.path.abspath('%s/../..' % __file__)
39
40
41 def show_stext(document):
42 '''
43 Shows all available information about Stext blocks, lines and characters.
44 '''
45 for p in range(document.count_pages()):
46 page = document.load_page(p)
47 stextpage = mupdf.StextPage(page, mupdf.StextOptions())
48 for block in stextpage:
49 block_ = block.m_internal
50 log(f'block: type={block_.type} bbox={block_.bbox}')
51 for line in block:
52 line_ = line.m_internal
53 log(f' line: wmode={line_.wmode}'
54 + f' dir={line_.dir}'
55 + f' bbox={line_.bbox}'
56 )
57 for char in line:
58 char_ = char.m_internal
59 log(f' char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
60 + f' origin={char_.origin}'
61 + f' quad={char_.quad}'
62 + f' size={char_.size:6.2f}'
63 + f' font=('
64 + f'is_mono={char_.font.flags.is_mono}'
65 + f' is_bold={char_.font.flags.is_bold}'
66 + f' is_italic={char_.font.flags.is_italic}'
67 + f' ft_substitute={char_.font.flags.ft_substitute}'
68 + f' ft_stretch={char_.font.flags.ft_stretch}'
69 + f' fake_bold={char_.font.flags.fake_bold}'
70 + f' fake_italic={char_.font.flags.fake_italic}'
71 + f' has_opentype={char_.font.flags.has_opentype}'
72 + f' invalid_bbox={char_.font.flags.invalid_bbox}'
73 + f' name={char_.font.name}'
74 + f')'
75 )
76
77
78 def test_filter(path):
79 if platform.system() == 'Windows':
80 print( 'Not testing mupdf.PdfFilterOptions2 because known to fail on Windows.')
81 return
82
83 # pdf_sanitizer_filter_options.
84 class MySanitizeFilterOptions( mupdf.PdfSanitizeFilterOptions2):
85 def __init__( self):
86 super().__init__()
87 self.use_virtual_text_filter()
88 self.state = 1
89 def text_filter( self, ctx, ucsbuf, ucslen, trm, ctm, bbox):
90 if 0:
91 log( f'text_filter(): ctx={ctx} ucsbuf={ucsbuf} ucslen={ucslen} trm={trm} ctm={ctm} bbox={bbox}')
92 # Remove every other item.
93 self.state = 1 - self.state
94 return self.state
95 sanitize_filter_options = MySanitizeFilterOptions()
96
97 # pdf_filter_factory.
98 class MyPdfFilterFactory( mupdf.PdfFilterFactory2):
99 def __init__( self, sopts):
100 super().__init__()
101 self.sopts = sopts
102 self.use_virtual_filter()
103 def filter(self, ctx, doc, chain, struct_parents, transform, options):
104 return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
105 def filter_bad(self, ctx, doc, chain, struct_parents, transform, options, extra_arg):
106 return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
107 filter_factory = MyPdfFilterFactory( sanitize_filter_options.internal())
108
109 # pdf_filter_options.
110 class MyFilterOptions( mupdf.PdfFilterOptions2):
111 def __init__( self):
112 super().__init__()
113 self.recurse = 1
114 self.instance_forms = 0
115 self.ascii = 1
116 filter_options = MyFilterOptions()
117
118 filter_options.add_factory( filter_factory.internal())
119
120 document = mupdf.PdfDocument(path)
121 for p in range(document.pdf_count_pages()):
122 page = document.pdf_load_page(p)
123 log( f'Running document.pdf_filter_page_contents on page {p}')
124 document.pdf_begin_operation('test filter')
125 document.pdf_filter_page_contents(page, filter_options)
126 document.pdf_end_operation()
127
128 if 1:
129 # Try again but with a broken filter_factory callback method, and check
130 # we get an appropriate exception. This checks that the SWIG Director
131 # exception-handling code is working.
132 #
133 filter_factory.filter = filter_factory.filter_bad
134 page = document.pdf_load_page(0)
135 document.pdf_begin_operation('test filter')
136 try:
137 document.pdf_filter_page_contents(page, filter_options)
138 except Exception as e:
139 e_expected_text = "filter_bad() missing 1 required positional argument: 'extra_arg'"
140 if e_expected_text not in str(e):
141 raise Exception(f'Error does not contain expected text: {e_expected_text}') from e
142 finally:
143 document.pdf_end_operation()
144
145 if 1:
146 document.pdf_save_document('mupdf_test-out0.pdf', mupdf.PdfWriteOptions())
147
148
149 def test_install_load_system_font(path):
150 '''
151 Very basic test of mupdf.fz_install_load_system_font_funcs(). We check
152 that the fonts returned by our python callback is returned if we ask for a
153 non-existent font.
154
155 We also render `path` as a PNG with/without our font override. This isn't
156 particularly useful, but if `path` contained references to unknown fonts,
157 it would give different results.
158 '''
159 print(f'test_install_load_system_font()')
160
161 def make_png(infix=''):
162 document = mupdf.FzDocument(path)
163 pixmap = mupdf.FzPixmap(document, 0, mupdf.FzMatrix(), mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB), 0)
164 path_out = f'{path}{infix}.png'
165 pixmap.fz_save_pixmap_as_png(path_out)
166 print(f'Have created: {path_out}.')
167
168 make_png()
169
170 trace = list()
171 replacement_font = mupdf.fz_new_font_from_file(
172 None,
173 os.path.abspath(f'{__file__}/../../resources/fonts/urw/NimbusRoman-BoldItalic.cff'),
174 0,
175 0,
176 )
177 assert replacement_font.m_internal
178 print(f'{replacement_font.m_internal.name=} {replacement_font.m_internal.glyph_count=}')
179
180 def font_f(name, bold, italic, needs_exact_metrics):
181 trace.append((name, bold, italic, needs_exact_metrics))
182 print(f'font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.')
183 # Always return `replacement_font`.
184 return replacement_font
185 def f_cjk(name, ordering, serif):
186 trace.append((name, ordering, serif))
187 print(f'f_cjk(): Looking for font: {name=} {ordering=} {serif=}.')
188 return None
189 def f_fallback(script, language, serif, bold, italic):
190 trace.append((script, language, serif, bold, italic))
191 print(f'f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.')
192 return None
193 mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback)
194
195 # Check that asking for any font returns `replacement_font`.
196 font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
197 assert isinstance(font, mupdf.FzFont)
198 assert trace == [
199 ('some-font-name', 0, 0, 0),
200 ], f'Incorrect {trace=}.'
201 assert font.m_internal
202 print(f'{font.m_internal.name=} {font.m_internal.glyph_count=}')
203 assert font.m_internal.name == replacement_font.m_internal.name
204 assert font.m_internal.glyph_count == replacement_font.m_internal.glyph_count
205
206 make_png('-replace-font')
207
208 # Restore default behaviour.
209 mupdf.fz_install_load_system_font_funcs()
210 font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
211 assert not font.m_internal
212
213
214 def test(path):
215 '''
216 Runs various mupdf operations on <path>, which is assumed to be a file that
217 mupdf can open.
218 '''
219 log(f'testing path={path}')
220
221 assert os.path.isfile(path)
222 global g_test_n
223 g_test_n += 1
224
225 test_install_load_system_font(path)
226
227 # See notes in wrap/swig.py:build_swig() about buffer_extract() and
228 # buffer_storage().
229 #
230 assert getattr(mupdf.FzBuffer, 'fz_buffer_storage_raw', None) is None
231 assert getattr(mupdf.FzBuffer, 'fz_buffer_storage')
232 assert getattr(mupdf.FzBuffer, 'fz_buffer_extract')
233 assert getattr(mupdf.FzBuffer, 'fz_buffer_extract_copy')
234
235 # Test that we get the expected Python exception instance and text.
236 document = mupdf.FzDocument(path)
237 try:
238 mupdf.fz_load_page(document, 99999999)
239 except mupdf.FzErrorArgument as e:
240 log(f'{type(e)=} {str(e)=} {repr(e)=}.')
241 log(f'{e.what()=}.')
242 expected = 'code=4: invalid page number: 100000000'
243 assert str(e) == expected and e.what() == expected, (
244 f'Incorrect exception text:\n'
245 f' {str(e)=}\n'
246 f' {e.what()=}\n'
247 f' {expected=}'
248 )
249 except Exception as e:
250 assert 0, f'Incorrect exception {type(e)=} {e=}.'
251 else:
252 assert 0, f'No expected exception.'
253
254 # Test SWIG Director wrapping of pdf_filter_options:
255 #
256 test_filter(path)
257
258 # Test operations using functions:
259 #
260 log('Testing functions.')
261 log(f' Opening: %s' % path)
262 document = mupdf.fz_open_document(path)
263 log(f' mupdf.fz_needs_password(document)={mupdf.fz_needs_password(document)}')
264 log(f' mupdf.fz_count_pages(document)={mupdf.fz_count_pages(document)}')
265 log(f' mupdf.fz_document_output_intent(document)={mupdf.fz_document_output_intent(document)}')
266
267 # Test operations using classes:
268 #
269 log(f'Testing classes')
270
271 document = mupdf.FzDocument(path)
272 log(f'Have created mupdf.FzDocument for {path}')
273 log(f'document.fz_needs_password()={document.fz_needs_password()}')
274 log(f'document.fz_count_pages()={document.fz_count_pages()}')
275
276 if 0:
277 log(f'stext info:')
278 show_stext(document)
279
280 for k in (
281 'format',
282 'encryption',
283 'info:Author',
284 'info:Title',
285 'info:Creator',
286 'info:Producer',
287 'qwerty',
288 ):
289 v = document.fz_lookup_metadata(k)
290 log(f'document.fz_lookup_metadata() k={k} returned v={v!r}')
291 if k == 'qwerty':
292 assert v is None, f'v={v!r}'
293 else:
294 pass
295
296 zoom = 10
297 scale = mupdf.FzMatrix.fz_scale(zoom/100., zoom/100.)
298 page_number = 0
299 log(f'Have created scale: a={scale.a} b={scale.b} c={scale.c} d={scale.d} e={scale.e} f={scale.f}')
300
301 colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
302 log(f'colorspace.m_internal.key_storable.storable.refs={colorspace.m_internal.key_storable.storable.refs!r}')
303 if 0:
304 c = colorspace.fz_clamp_color([3.14])
305 log('colorspace.clamp_color returned c={c}')
306 pixmap = mupdf.FzPixmap(document, page_number, scale, colorspace, 0)
307 log(f'Have created pixmap: {pixmap.m_internal.w} {pixmap.m_internal.h} {pixmap.m_internal.stride} {pixmap.m_internal.n}')
308
309 filename = f'mupdf_test-out1-{g_test_n}.png'
310 pixmap.fz_save_pixmap_as_png(filename)
311 log(f'Have created {filename} using pixmap.save_pixmap_as_png().')
312
313 # Print image data in ascii PPM format. Copied from
314 # mupdf/docs/examples/example.c.
315 #
316 samples = pixmap.samples()
317 stride = pixmap.stride()
318 n = pixmap.n()
319 filename = f'mupdf_test-out2-{g_test_n}.ppm'
320 with open(filename, 'w') as f:
321 f.write('P3\n')
322 f.write('%s %s\n' % (pixmap.m_internal.w, pixmap.m_internal.h))
323 f.write('255\n')
324 for y in range(0, pixmap.m_internal.h):
325 for x in range(pixmap.m_internal.w):
326 if x:
327 f.write(' ')
328 offset = y * stride + x * n
329 if hasattr(mupdf, 'bytes_getitem'):
330 # swig
331 f.write('%3d %3d %3d' % (
332 mupdf.bytes_getitem(samples, offset + 0),
333 mupdf.bytes_getitem(samples, offset + 1),
334 mupdf.bytes_getitem(samples, offset + 2),
335 ))
336 else:
337 # cppyy
338 f.write('%3d %3d %3d' % (
339 samples[offset + 0],
340 samples[offset + 1],
341 samples[offset + 2],
342 ))
343 f.write('\n')
344 log(f'Have created {filename} by scanning pixmap.')
345
346 # Generate .png and but create Pixmap from Page instead of from Document.
347 #
348 page = mupdf.FzPage(document, 0)
349 separations = page.fz_page_separations()
350 log(f'page_separations() returned {"true" if separations else "false"}')
351 pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
352 filename = f'mupdf_test-out3-{g_test_n}.png'
353 pixmap.fz_save_pixmap_as_png(filename)
354 log(f'Have created {filename} using pixmap.fz_save_pixmap_as_png()')
355
356 # Show links
357 log(f'Links.')
358 page = mupdf.FzPage(document, 0)
359 link = mupdf.fz_load_links(page);
360 log(f'{link}')
361 if link:
362 for i in link:
363 log(f'{i}')
364
365 # Check we can iterate over Link's, by creating one manually.
366 #
367 link = mupdf.FzLink(mupdf.FzRect(0, 0, 1, 1), "hello")
368 log(f'items in <link> are:')
369 for i in link:
370 log(f' {i.m_internal.refs} {i.m_internal.uri}')
371
372 # Check iteration over Outlines. We do depth-first iteration.
373 #
374 log(f'Outlines.')
375 def olog(text):
376 if 0:
377 log(text)
378 num_outline_items = 0
379 depth = 0
380 it = mupdf.FzOutlineIterator(document)
381 while 1:
382 item = it.fz_outline_iterator_item()
383 olog(f'depth={depth} valid={item.valid()}')
384 if item.valid():
385 log(f'{" "*depth*4}uri={item.uri()} is_open={item.is_open()} title={item.title()}')
386 num_outline_items += 1
387 else:
388 olog(f'{" "*depth*4}<null>')
389 r = it.fz_outline_iterator_down()
390 olog(f'depth={depth} down => {r}')
391 if r >= 0:
392 depth += 1
393 if r < 0:
394 r = it.fz_outline_iterator_next()
395 olog(f'depth={depth} next => {r}')
396 assert r
397 if r:
398 # No more items at current depth, so repeatedly go up until we
399 # can go right.
400 end = 0
401 while 1:
402 r = it.fz_outline_iterator_up()
403 olog(f'depth={depth} up => {r}')
404 if r < 0:
405 # We are at EOF. Need to break out of top-level loop.
406 end = 1
407 break
408 depth -= 1
409 r = it.fz_outline_iterator_next()
410 olog(f'depth={depth} next => {r}')
411 if r == 0:
412 # There are items at this level.
413 break
414 if end:
415 break
416 log(f'num_outline_items={num_outline_items}')
417
418 # Check iteration over StextPage.
419 #
420 log(f'StextPage.')
421 stext_options = mupdf.FzStextOptions(0)
422 page_num = 40
423 try:
424 stext_page = mupdf.FzStextPage(document, page_num, stext_options)
425 except Exception:
426 log(f'no page_num={page_num}')
427 else:
428 device_stext = mupdf.FzDevice(stext_page, stext_options)
429 matrix = mupdf.FzMatrix()
430 page = mupdf.FzPage(document, 0)
431 cookie = mupdf.FzCookie()
432 page.fz_run_page(device_stext, matrix, cookie)
433 log(f' stext_page is:')
434 for block in stext_page:
435 log(f' block:')
436 for line in block:
437 line_text = ''
438 for char in line:
439 line_text += chr(char.m_internal.c)
440 log(f' {line_text}')
441
442 device_stext.fz_close_device()
443
444 # Check fz_search_page2().
445 items = mupdf.fz_search_page2(document, 0, "compression", 20)
446 print(f'{len(items)=}')
447 for item in items:
448 print(f' {item.mark=} {item.quad=}')
449
450 # Check copy-constructor.
451 log(f'Checking copy-constructor')
452 document2 = mupdf.FzDocument(document)
453 del document
454 page = mupdf.FzPage(document2, 0)
455 scale = mupdf.FzMatrix()
456 pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
457 pixmap.fz_save_pixmap_as_png('mupdf_test-out3.png')
458
459 stdout = mupdf.FzOutput(mupdf.FzOutput.Fixed_STDOUT)
460 log(f'{type(stdout)} {stdout.m_internal.state}')
461
462 mediabox = page.fz_bound_page()
463 out = mupdf.FzDocumentWriter(filename, 'png', '', mupdf.FzDocumentWriter.FormatPathType_DOCUMENT)
464 dev = out.fz_begin_page(mediabox)
465 page.fz_run_page(dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
466 out.fz_end_page()
467
468 # Check out-params are converted into python return value.
469 bitmap = mupdf.FzBitmap(10, 20, 8, 72, 72)
470 bitmap_details = bitmap.fz_bitmap_details()
471 log(f'{bitmap_details}')
472 assert list(bitmap_details) == [10, 20, 8, 12], f'bitmap_details={bitmap_details!r}'
473
474 log(f'finished test of %s' % path)
475
476
477 if __name__ == '__main__':
478
479 print(f'{mupdf.Py_LIMITED_API=}', flush=1)
480 paths = sys.argv[1:]
481 if not paths:
482 paths = [
483 f'{g_mupdf_root}/thirdparty/zlib/zlib.3.pdf',
484 ]
485 # Run test() on all the .pdf files in the mupdf repository.
486 #
487 for path in paths:
488
489 log_prefix_set(f'{os.path.relpath(path, g_mupdf_root)}: ')
490 try:
491 test(path)
492 finally:
493 log_prefix_set('')
494
495 log(f'finished')