diff tests/test_2548.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_2548.py	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,42 @@
+import os
+
+import pymupdf
+
+root = os.path.abspath(f'{__file__}/../..')
+
+def test_2548():
+    """Text extraction should fail because of PDF structure cycle.
+
+    Old MuPDF version did not detect the loop.
+    """
+    print(f'test_2548(): {pymupdf.mupdf_version_tuple=}')
+    pymupdf.TOOLS.mupdf_warnings(reset=True)
+    doc = pymupdf.open(f'{root}/tests/resources/test_2548.pdf')
+    e = False
+    for page in doc:
+        try:
+            _ = page.get_text()
+        except Exception as ee:
+            print(f'test_2548: {ee=}')
+            if hasattr(pymupdf, 'mupdf'):
+                # Rebased.
+                expected = "RuntimeError('code=2: cycle in structure tree')"
+            else:
+                # Classic.
+                expected = "RuntimeError('cycle in structure tree')"
+            assert repr(ee) == expected, f'Expected {expected=} but got {repr(ee)=}.'
+            e = True
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    print(f'test_2548(): {wt=}')
+
+    # This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier
+    # versions with updated MuPDF also fix the bug.
+    rebased = hasattr(pymupdf, 'mupdf')
+    if pymupdf.mupdf_version_tuple >= (1, 27):
+        expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing'
+        expected = '\n'.join([expected] * 5)
+    else:
+        expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
+    if rebased:
+        assert wt == expected, f'expected:\n    {expected!r}\nwt:\n    {wt!r}\n'
+    assert not e