Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/scripts/mupdfwrap_test.py @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:44:09 +0200
parents: b50eed0cc0ef
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/scripts/mupdfwrap_test.py	Mon Sep 15 11:44:09 2025 +0200
@@ -0,0 +1,495 @@
+#!/usr/bin/env python3
+
+'''
+Simple tests of the Python MuPDF API.
+'''
+
+import inspect
+import os
+import platform
+import sys
+
+if os.environ.get('MUPDF_PYTHON') in ('swig', None):
+    # PYTHONPATH should have been set up to point to a build/shared-*/
+    # directory containing mupdf.so generated by scripts/mupdfwrap.py and SWIG.
+    import mupdf
+elif os.environ.get('MUPDF_PYTHON') == 'cppyy':
+    sys.path.insert(0, os.path.abspath(f'{__file__}/../../platform/python'))
+    import mupdf_cppyy
+    del sys.path[0]
+    mupdf = mupdf_cppyy.cppyy.gbl.mupdf
+else:
+    raise Exception(f'Unrecognised $MUPDF_PYTHON: {os.environ.get("MUPDF_PYTHON")}')
+
+
+_log_prefix = ''
+
+def log(text):
+    f = inspect.stack()[1]
+    print(f'{f.filename}:{f.lineno} {_log_prefix}{text}', file=sys.stderr)
+    sys.stderr.flush()
+
+def log_prefix_set(prefix):
+    global _log_prefix
+    _log_prefix = prefix
+
+g_test_n = 0
+
+g_mupdf_root = os.path.abspath('%s/../..' % __file__)
+
+
+def show_stext(document):
+    '''
+    Shows all available information about Stext blocks, lines and characters.
+    '''
+    for p in range(document.count_pages()):
+        page = document.load_page(p)
+        stextpage = mupdf.StextPage(page, mupdf.StextOptions())
+        for block in stextpage:
+            block_ = block.m_internal
+            log(f'block: type={block_.type} bbox={block_.bbox}')
+            for line in block:
+                line_ = line.m_internal
+                log(f'    line: wmode={line_.wmode}'
+                        + f' dir={line_.dir}'
+                        + f' bbox={line_.bbox}'
+                        )
+                for char in line:
+                    char_ = char.m_internal
+                    log(f'        char: {chr(char_.c)!r} c={char_.c:4} color={char_.color}'
+                            + f' origin={char_.origin}'
+                            + f' quad={char_.quad}'
+                            + f' size={char_.size:6.2f}'
+                            + f' font=('
+                                +  f'is_mono={char_.font.flags.is_mono}'
+                                + f' is_bold={char_.font.flags.is_bold}'
+                                + f' is_italic={char_.font.flags.is_italic}'
+                                + f' ft_substitute={char_.font.flags.ft_substitute}'
+                                + f' ft_stretch={char_.font.flags.ft_stretch}'
+                                + f' fake_bold={char_.font.flags.fake_bold}'
+                                + f' fake_italic={char_.font.flags.fake_italic}'
+                                + f' has_opentype={char_.font.flags.has_opentype}'
+                                + f' invalid_bbox={char_.font.flags.invalid_bbox}'
+                                + f' name={char_.font.name}'
+                                + f')'
+                            )
+
+
+def test_filter(path):
+    if platform.system() == 'Windows':
+        print( 'Not testing mupdf.PdfFilterOptions2 because known to fail on Windows.')
+        return
+
+    # pdf_sanitizer_filter_options.
+    class MySanitizeFilterOptions( mupdf.PdfSanitizeFilterOptions2):
+        def __init__( self):
+            super().__init__()
+            self.use_virtual_text_filter()
+            self.state = 1
+        def text_filter( self, ctx, ucsbuf, ucslen, trm, ctm, bbox):
+            if 0:
+                log( f'text_filter(): ctx={ctx} ucsbuf={ucsbuf} ucslen={ucslen} trm={trm} ctm={ctm} bbox={bbox}')
+            # Remove every other item.
+            self.state = 1 - self.state
+            return self.state
+    sanitize_filter_options = MySanitizeFilterOptions()
+
+    # pdf_filter_factory.
+    class MyPdfFilterFactory( mupdf.PdfFilterFactory2):
+        def __init__( self, sopts):
+            super().__init__()
+            self.sopts = sopts
+            self.use_virtual_filter()
+        def filter(self, ctx, doc, chain, struct_parents, transform, options):
+            return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
+        def filter_bad(self, ctx, doc, chain, struct_parents, transform, options, extra_arg):
+            return mupdf.ll_pdf_new_sanitize_filter( doc, chain, struct_parents, transform, options, self.sopts)
+    filter_factory = MyPdfFilterFactory( sanitize_filter_options.internal())
+
+    # pdf_filter_options.
+    class MyFilterOptions( mupdf.PdfFilterOptions2):
+        def __init__( self):
+            super().__init__()
+            self.recurse = 1
+            self.instance_forms = 0
+            self.ascii = 1
+    filter_options = MyFilterOptions()
+
+    filter_options.add_factory( filter_factory.internal())
+
+    document = mupdf.PdfDocument(path)
+    for p in range(document.pdf_count_pages()):
+        page = document.pdf_load_page(p)
+        log( f'Running document.pdf_filter_page_contents on page {p}')
+        document.pdf_begin_operation('test filter')
+        document.pdf_filter_page_contents(page, filter_options)
+        document.pdf_end_operation()
+
+    if 1:
+        # Try again but with a broken filter_factory callback method, and check
+        # we get an appropriate exception. This checks that the SWIG Director
+        # exception-handling code is working.
+        #
+        filter_factory.filter = filter_factory.filter_bad
+        page = document.pdf_load_page(0)
+        document.pdf_begin_operation('test filter')
+        try:
+            document.pdf_filter_page_contents(page, filter_options)
+        except Exception as e:
+            e_expected_text = "filter_bad() missing 1 required positional argument: 'extra_arg'"
+            if e_expected_text not in str(e):
+                raise Exception(f'Error does not contain expected text: {e_expected_text}') from e
+        finally:
+            document.pdf_end_operation()
+
+    if 1:
+        document.pdf_save_document('mupdf_test-out0.pdf', mupdf.PdfWriteOptions())
+
+
+def test_install_load_system_font(path):
+    '''
+    Very basic test of mupdf.fz_install_load_system_font_funcs(). We check
+    that the fonts returned by our python callback is returned if we ask for a
+    non-existent font.
+
+    We also render `path` as a PNG with/without our font override. This isn't
+    particularly useful, but if `path` contained references to unknown fonts,
+    it would give different results.
+    '''
+    print(f'test_install_load_system_font()')
+
+    def make_png(infix=''):
+        document = mupdf.FzDocument(path)
+        pixmap = mupdf.FzPixmap(document, 0, mupdf.FzMatrix(), mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB), 0)
+        path_out = f'{path}{infix}.png'
+        pixmap.fz_save_pixmap_as_png(path_out)
+        print(f'Have created: {path_out}.')
+
+    make_png()
+
+    trace = list()
+    replacement_font = mupdf.fz_new_font_from_file(
+            None,
+            os.path.abspath(f'{__file__}/../../resources/fonts/urw/NimbusRoman-BoldItalic.cff'),
+            0,
+            0,
+            )
+    assert replacement_font.m_internal
+    print(f'{replacement_font.m_internal.name=} {replacement_font.m_internal.glyph_count=}')
+
+    def font_f(name, bold, italic, needs_exact_metrics):
+        trace.append((name, bold, italic, needs_exact_metrics))
+        print(f'font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.')
+        # Always return `replacement_font`.
+        return replacement_font
+    def f_cjk(name, ordering, serif):
+        trace.append((name, ordering, serif))
+        print(f'f_cjk(): Looking for font: {name=} {ordering=} {serif=}.')
+        return None
+    def f_fallback(script, language, serif, bold, italic):
+        trace.append((script, language, serif, bold, italic))
+        print(f'f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.')
+        return None
+    mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback)
+
+    # Check that asking for any font returns `replacement_font`.
+    font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
+    assert isinstance(font, mupdf.FzFont)
+    assert trace == [
+            ('some-font-name', 0, 0, 0),
+            ], f'Incorrect {trace=}.'
+    assert font.m_internal
+    print(f'{font.m_internal.name=} {font.m_internal.glyph_count=}')
+    assert font.m_internal.name == replacement_font.m_internal.name
+    assert font.m_internal.glyph_count == replacement_font.m_internal.glyph_count
+
+    make_png('-replace-font')
+
+    # Restore default behaviour.
+    mupdf.fz_install_load_system_font_funcs()
+    font = mupdf.fz_load_system_font("some-font-name", 0, 0, 0)
+    assert not font.m_internal
+
+
+def test(path):
+    '''
+    Runs various mupdf operations on <path>, which is assumed to be a file that
+    mupdf can open.
+    '''
+    log(f'testing path={path}')
+
+    assert os.path.isfile(path)
+    global g_test_n
+    g_test_n += 1
+
+    test_install_load_system_font(path)
+
+    # See notes in wrap/swig.py:build_swig() about buffer_extract() and
+    # buffer_storage().
+    #
+    assert getattr(mupdf.FzBuffer, 'fz_buffer_storage_raw', None) is None
+    assert getattr(mupdf.FzBuffer, 'fz_buffer_storage')
+    assert getattr(mupdf.FzBuffer, 'fz_buffer_extract')
+    assert getattr(mupdf.FzBuffer, 'fz_buffer_extract_copy')
+
+    # Test that we get the expected Python exception instance and text.
+    document = mupdf.FzDocument(path)
+    try:
+        mupdf.fz_load_page(document, 99999999)
+    except mupdf.FzErrorArgument as e:
+        log(f'{type(e)=} {str(e)=} {repr(e)=}.')
+        log(f'{e.what()=}.')
+        expected = 'code=4: invalid page number: 100000000'
+        assert str(e) == expected and e.what() == expected, (
+                f'Incorrect exception text:\n'
+                f'    {str(e)=}\n'
+                f'    {e.what()=}\n'
+                f'    {expected=}'
+                )
+    except Exception as e:
+        assert 0, f'Incorrect exception {type(e)=} {e=}.'
+    else:
+        assert 0, f'No expected exception.'
+
+    # Test SWIG Director wrapping of pdf_filter_options:
+    #
+    test_filter(path)
+
+    # Test operations using functions:
+    #
+    log('Testing functions.')
+    log(f'    Opening: %s' % path)
+    document = mupdf.fz_open_document(path)
+    log(f'    mupdf.fz_needs_password(document)={mupdf.fz_needs_password(document)}')
+    log(f'    mupdf.fz_count_pages(document)={mupdf.fz_count_pages(document)}')
+    log(f'    mupdf.fz_document_output_intent(document)={mupdf.fz_document_output_intent(document)}')
+
+    # Test operations using classes:
+    #
+    log(f'Testing classes')
+
+    document = mupdf.FzDocument(path)
+    log(f'Have created mupdf.FzDocument for {path}')
+    log(f'document.fz_needs_password()={document.fz_needs_password()}')
+    log(f'document.fz_count_pages()={document.fz_count_pages()}')
+
+    if 0:
+        log(f'stext info:')
+        show_stext(document)
+
+    for k in (
+            'format',
+            'encryption',
+            'info:Author',
+            'info:Title',
+            'info:Creator',
+            'info:Producer',
+            'qwerty',
+            ):
+        v = document.fz_lookup_metadata(k)
+        log(f'document.fz_lookup_metadata() k={k} returned v={v!r}')
+        if k == 'qwerty':
+            assert v is None, f'v={v!r}'
+        else:
+            pass
+
+    zoom = 10
+    scale = mupdf.FzMatrix.fz_scale(zoom/100., zoom/100.)
+    page_number = 0
+    log(f'Have created scale: a={scale.a} b={scale.b} c={scale.c} d={scale.d} e={scale.e} f={scale.f}')
+
+    colorspace = mupdf.FzColorspace(mupdf.FzColorspace.Fixed_RGB)
+    log(f'colorspace.m_internal.key_storable.storable.refs={colorspace.m_internal.key_storable.storable.refs!r}')
+    if 0:
+        c = colorspace.fz_clamp_color([3.14])
+        log('colorspace.clamp_color returned c={c}')
+    pixmap = mupdf.FzPixmap(document, page_number, scale, colorspace, 0)
+    log(f'Have created pixmap: {pixmap.m_internal.w} {pixmap.m_internal.h} {pixmap.m_internal.stride} {pixmap.m_internal.n}')
+
+    filename = f'mupdf_test-out1-{g_test_n}.png'
+    pixmap.fz_save_pixmap_as_png(filename)
+    log(f'Have created {filename} using pixmap.save_pixmap_as_png().')
+
+    # Print image data in ascii PPM format. Copied from
+    # mupdf/docs/examples/example.c.
+    #
+    samples = pixmap.samples()
+    stride = pixmap.stride()
+    n = pixmap.n()
+    filename = f'mupdf_test-out2-{g_test_n}.ppm'
+    with open(filename, 'w') as f:
+        f.write('P3\n')
+        f.write('%s %s\n' % (pixmap.m_internal.w, pixmap.m_internal.h))
+        f.write('255\n')
+        for y in range(0, pixmap.m_internal.h):
+            for x in range(pixmap.m_internal.w):
+                if x:
+                    f.write('  ')
+                offset = y * stride + x * n
+                if hasattr(mupdf, 'bytes_getitem'):
+                    # swig
+                    f.write('%3d %3d %3d' % (
+                            mupdf.bytes_getitem(samples, offset + 0),
+                            mupdf.bytes_getitem(samples, offset + 1),
+                            mupdf.bytes_getitem(samples, offset + 2),
+                            ))
+                else:
+                    # cppyy
+                    f.write('%3d %3d %3d' % (
+                            samples[offset + 0],
+                            samples[offset + 1],
+                            samples[offset + 2],
+                            ))
+            f.write('\n')
+    log(f'Have created {filename} by scanning pixmap.')
+
+    # Generate .png and but create Pixmap from Page instead of from Document.
+    #
+    page = mupdf.FzPage(document, 0)
+    separations = page.fz_page_separations()
+    log(f'page_separations() returned {"true" if separations else "false"}')
+    pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
+    filename = f'mupdf_test-out3-{g_test_n}.png'
+    pixmap.fz_save_pixmap_as_png(filename)
+    log(f'Have created {filename} using pixmap.fz_save_pixmap_as_png()')
+
+    # Show links
+    log(f'Links.')
+    page = mupdf.FzPage(document, 0)
+    link = mupdf.fz_load_links(page);
+    log(f'{link}')
+    if link:
+        for i in link:
+            log(f'{i}')
+
+    # Check we can iterate over Link's, by creating one manually.
+    #
+    link = mupdf.FzLink(mupdf.FzRect(0, 0, 1, 1), "hello")
+    log(f'items in <link> are:')
+    for i in link:
+        log(f'    {i.m_internal.refs} {i.m_internal.uri}')
+
+    # Check iteration over Outlines. We do depth-first iteration.
+    #
+    log(f'Outlines.')
+    def olog(text):
+        if 0:
+            log(text)
+    num_outline_items = 0
+    depth = 0
+    it = mupdf.FzOutlineIterator(document)
+    while 1:
+        item = it.fz_outline_iterator_item()
+        olog(f'depth={depth} valid={item.valid()}')
+        if item.valid():
+            log(f'{" "*depth*4}uri={item.uri()} is_open={item.is_open()} title={item.title()}')
+            num_outline_items += 1
+        else:
+            olog(f'{" "*depth*4}<null>')
+        r = it.fz_outline_iterator_down()
+        olog(f'depth={depth} down => {r}')
+        if r >= 0:
+            depth += 1
+        if r < 0:
+            r = it.fz_outline_iterator_next()
+            olog(f'depth={depth} next => {r}')
+            assert r
+            if r:
+                # No more items at current depth, so repeatedly go up until we
+                # can go right.
+                end = 0
+                while 1:
+                    r = it.fz_outline_iterator_up()
+                    olog(f'depth={depth} up => {r}')
+                    if r < 0:
+                        # We are at EOF. Need to break out of top-level loop.
+                        end = 1
+                        break
+                    depth -= 1
+                    r = it.fz_outline_iterator_next()
+                    olog(f'depth={depth} next => {r}')
+                    if r == 0:
+                        # There are items at this level.
+                        break
+                if end:
+                    break
+    log(f'num_outline_items={num_outline_items}')
+
+    # Check iteration over StextPage.
+    #
+    log(f'StextPage.')
+    stext_options = mupdf.FzStextOptions(0)
+    page_num = 40
+    try:
+        stext_page = mupdf.FzStextPage(document, page_num, stext_options)
+    except Exception:
+        log(f'no page_num={page_num}')
+    else:
+        device_stext = mupdf.FzDevice(stext_page, stext_options)
+        matrix = mupdf.FzMatrix()
+        page = mupdf.FzPage(document, 0)
+        cookie = mupdf.FzCookie()
+        page.fz_run_page(device_stext, matrix, cookie)
+        log(f'    stext_page is:')
+        for block in stext_page:
+            log(f'        block:')
+            for line in block:
+                line_text = ''
+                for char in line:
+                    line_text += chr(char.m_internal.c)
+                log(f'            {line_text}')
+
+        device_stext.fz_close_device()
+
+    # Check fz_search_page2().
+    items = mupdf.fz_search_page2(document, 0, "compression", 20)
+    print(f'{len(items)=}')
+    for item in items:
+        print(f'    {item.mark=} {item.quad=}')
+
+    # Check copy-constructor.
+    log(f'Checking copy-constructor')
+    document2 = mupdf.FzDocument(document)
+    del document
+    page = mupdf.FzPage(document2, 0)
+    scale = mupdf.FzMatrix()
+    pixmap = mupdf.FzPixmap(page, scale, colorspace, 0)
+    pixmap.fz_save_pixmap_as_png('mupdf_test-out3.png')
+
+    stdout = mupdf.FzOutput(mupdf.FzOutput.Fixed_STDOUT)
+    log(f'{type(stdout)} {stdout.m_internal.state}')
+
+    mediabox = page.fz_bound_page()
+    out = mupdf.FzDocumentWriter(filename, 'png', '', mupdf.FzDocumentWriter.FormatPathType_DOCUMENT)
+    dev = out.fz_begin_page(mediabox)
+    page.fz_run_page(dev, mupdf.FzMatrix(mupdf.fz_identity), mupdf.FzCookie())
+    out.fz_end_page()
+
+    # Check out-params are converted into python return value.
+    bitmap = mupdf.FzBitmap(10, 20, 8, 72, 72)
+    bitmap_details = bitmap.fz_bitmap_details()
+    log(f'{bitmap_details}')
+    assert list(bitmap_details) == [10, 20, 8, 12], f'bitmap_details={bitmap_details!r}'
+
+    log(f'finished test of %s' % path)
+
+
+if __name__ == '__main__':
+
+    print(f'{mupdf.Py_LIMITED_API=}', flush=1)
+    paths = sys.argv[1:]
+    if not paths:
+        paths = [
+                f'{g_mupdf_root}/thirdparty/zlib/zlib.3.pdf',
+                ]
+    # Run test() on all the .pdf files in the mupdf repository.
+    #
+    for path in paths:
+
+        log_prefix_set(f'{os.path.relpath(path, g_mupdf_root)}: ')
+        try:
+            test(path)
+        finally:
+            log_prefix_set('')
+
+    log(f'finished')
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:44:09 +0200
parents	b50eed0cc0ef
children