view tests/test_2548.py @ 40:aa33339d6b8a upstream

ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:31:38 +0200
parents 1d09e1dec1d9
children
line wrap: on
line source

import os

import pymupdf

root = os.path.abspath(f'{__file__}/../..')

def test_2548():
    """Text extraction should fail because of PDF structure cycle.

    Old MuPDF version did not detect the loop.
    """
    print(f'test_2548(): {pymupdf.mupdf_version_tuple=}')
    pymupdf.TOOLS.mupdf_warnings(reset=True)
    doc = pymupdf.open(f'{root}/tests/resources/test_2548.pdf')
    e = False
    for page in doc:
        try:
            _ = page.get_text()
        except Exception as ee:
            print(f'test_2548: {ee=}')
            if hasattr(pymupdf, 'mupdf'):
                # Rebased.
                expected = "RuntimeError('code=2: cycle in structure tree')"
            else:
                # Classic.
                expected = "RuntimeError('cycle in structure tree')"
            assert repr(ee) == expected, f'Expected {expected=} but got {repr(ee)=}.'
            e = True
    wt = pymupdf.TOOLS.mupdf_warnings()
    print(f'test_2548(): {wt=}')

    # This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier
    # versions with updated MuPDF also fix the bug.
    rebased = hasattr(pymupdf, 'mupdf')
    if pymupdf.mupdf_version_tuple >= (1, 27):
        expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing'
        expected = '\n'.join([expected] * 5)
    else:
        expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
    if rebased:
        assert wt == expected, f'expected:\n    {expected!r}\nwt:\n    {wt!r}\n'
    assert not e