diff tests/test_toc.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_toc.py	Mon Sep 15 11:44:09 2025 +0200
@@ -0,0 +1,288 @@
+"""
+* Verify equality of generated TOCs and expected results.
+* Verify TOC deletion works
+* Verify manipulation of single TOC item works
+* Verify stability against circular TOC items
+"""
+
+import os
+import sys
+import pymupdf
+import pathlib
+
+scriptdir = os.path.abspath(os.path.dirname(__file__))
+filename = os.path.join(scriptdir, "resources", "001003ED.pdf")
+filename2 = os.path.join(scriptdir, "resources", "2.pdf")
+circular = os.path.join(scriptdir, "resources", "circular-toc.pdf")
+full_toc = os.path.join(scriptdir, "resources", "full_toc.txt")
+simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt")
+file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf")
+doc = pymupdf.open(filename)
+
+
+def test_simple_toc():
+    simple_lines = open(simple_toc, "rb").read()
+    toc = b"".join([str(t).encode() for t in doc.get_toc(True)])
+    assert toc == simple_lines
+
+
+def test_full_toc():
+    if not hasattr(pymupdf, "mupdf"):
+        # Classic implementation does not have fix for this test.
+        print(f"Not running test_full_toc on classic implementation.")
+        return
+    expected_path = f"{scriptdir}/resources/full_toc.txt"
+    expected = pathlib.Path(expected_path).read_bytes()
+    # Github windows x32 seems to insert \r characters; maybe something to
+    # do with the Python installation's line endings settings.
+    expected = expected.decode("utf8")
+    expected = expected.replace('\r', '')
+    toc = "\n".join([str(t) for t in doc.get_toc(False)])
+    toc += "\n"
+    assert toc == expected
+
+
+def test_erase_toc():
+    doc.set_toc([])
+    assert doc.get_toc() == []
+
+
+def test_replace_toc():
+    toc = doc.get_toc(False)
+    doc.set_toc(toc)
+
+
+def test_setcolors():
+    doc = pymupdf.open(filename2)
+    toc = doc.get_toc(False)
+    for i in range(len(toc)):
+        d = toc[i][3]
+        d["color"] = (1, 0, 0)
+        d["bold"] = True
+        d["italic"] = True
+        doc.set_toc_item(i, dest_dict=d)
+
+    toc2 = doc.get_toc(False)
+    assert len(toc2) == len(toc)
+
+    for t in toc2:
+        d = t[3]
+        assert d["bold"]
+        assert d["italic"]
+        assert d["color"] == (1, 0, 0)
+
+
+def test_circular():
+    """The test file contains circular bookmarks."""
+    doc = pymupdf.open(circular)
+    toc = doc.get_toc(False)  # this must not loop
+    rebased = hasattr(pymupdf, 'mupdf')
+    if rebased:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert wt == 'Bad or missing prev pointer in outline tree, repairing', \
+                f'{wt=}'
+
+def test_2355():
+    
+    # Create a test PDF with toc.
+    doc = pymupdf.Document()
+    for _ in range(10):
+        doc.new_page(doc.page_count)
+    doc.set_toc([[1, 'test', 1], [1, 'test2', 5]])
+    
+    path = 'test_2355.pdf'
+    doc.save(path)
+
+    # Open many times
+    for i in range(10):
+        with pymupdf.open(path) as new_doc:
+            new_doc.get_toc()
+
+    # Open once and read many times
+    with pymupdf.open(path) as new_doc:
+        for i in range(10):
+            new_doc.get_toc()
+
+def test_2788():
+    '''
+    Check handling of Document.get_toc() when toc item has kind=4.
+    '''
+    if not hasattr(pymupdf, 'mupdf'):
+        # Classic implementation does not have fix for this test.
+        print(f'Not running test_2788 on classic implementation.')
+        return
+    path = os.path.abspath(f'{__file__}/../../tests/resources/test_2788.pdf')        
+    document = pymupdf.open(path)
+    toc0 = [[1, 'page2', 2, {'kind': 4, 'xref': 14, 'page': 1, 'to': pymupdf.Point(100.0, 760.0), 'zoom': 0.0, 'nameddest': 'page.2'}]]
+    toc1 = document.get_toc(simple=False)
+    print(f'{toc0=}')
+    print(f'{toc1=}')
+    assert toc1 == toc0
+    
+    doc.set_toc(toc0)
+    toc2 = document.get_toc(simple=False)
+    print(f'{toc0=}')
+    print(f'{toc2=}')
+    assert toc2 == toc0
+    
+    # Also test Page.get_links() bugfix from #2817.
+    for page in document:
+        page.get_links()
+    rebased = hasattr(pymupdf, 'mupdf')
+    if rebased:
+        wt = pymupdf.TOOLS.mupdf_warnings()
+        assert wt == (
+                "syntax error: expected 'obj' keyword (0 3 ?)\n"
+                "trying to repair broken xref\n"
+                "repairing PDF document"
+                ), f'{wt=}'
+
+
+def test_toc_count():
+    file_in = os.path.abspath(f'{__file__}/../../tests/resources/test_toc_count.pdf')
+    file_out = os.path.abspath(f'{__file__}/../../tests/test_toc_count_out.pdf')
+
+    def get(doc):
+        outlines = doc.xref_get_key(doc.pdf_catalog(), "Outlines")
+        ret = doc.xref_object(int(outlines[1].split()[0]))
+        return ret
+    print()
+    with pymupdf.open(file_in) as doc:
+        print(f'1: {get(doc)}')
+        toc = doc.get_toc(simple=False)
+        doc.set_toc([])
+        #print(f'2: {get(doc)}')
+        doc.set_toc(toc)
+        print(f'3: {get(doc)}')
+        doc.save(file_out, garbage=4)
+    with pymupdf.open(file_out) as doc:
+        print(f'4: {get(doc)}')
+    pymupdf._log_items_clear()
+
+
+def test_3347():
+    '''
+    Check fix for #3347 - link destination rectangles when source/destination
+    pages have different sizes.
+    '''
+    doc = pymupdf.open()
+    doc.new_page(width=500, height=800)
+    doc.new_page(width=800, height=500)
+    rects = [
+        (0, pymupdf.Rect(10, 20, 50, 40), pymupdf.utils.getColor('red')),
+        (0, pymupdf.Rect(300, 350, 400, 450), pymupdf.utils.getColor('green')),
+        (1, pymupdf.Rect(20, 30, 40, 50), pymupdf.utils.getColor('blue')),
+        (1, pymupdf.Rect(350, 300, 450, 400), pymupdf.utils.getColor('black'))
+    ]
+
+    for page, rect, color in rects:
+        doc[page].draw_rect(rect, color=color)
+
+    for (from_page, from_rect, _), (to_page, to_rect, _) in zip(rects, rects[1:] + rects[:1]):
+        doc[from_page].insert_link({
+            'kind': 1,
+            'from': from_rect,
+            'page': to_page,
+            'to': to_rect.top_left,
+        })
+
+    links_expected = [
+            (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
+            (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
+            (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
+            (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
+            ]
+
+    path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf')
+    doc.save(path)
+    print(f'Have saved to {path=}.')
+
+    links_actual = list()
+    for page_i, page in enumerate(doc):
+        links = page.get_links()
+        for link_i, link in enumerate(links):
+            print(f'{page_i=} {link_i=}: {link!r}')
+            links_actual.append( (page_i, link) )
+    
+    assert links_actual == links_expected
+
+
+def test_3400():
+    '''
+    Check fix for #3400 - link destination rectangles when source/destination
+    pages have different rotations.
+    '''
+    width = 750
+    height = 1110
+    circle_middle_point = pymupdf.Point(height / 4, width / 4)
+    print(f'{circle_middle_point=}')
+    with pymupdf.open() as doc:
+        
+        page = doc.new_page(width=width, height=height)
+        page.set_rotation(270)
+        # draw a circle at the middle point to facilitate debugging
+        page.draw_circle(circle_middle_point, color=(0, 0, 1), radius=5, width=2)
+        
+        for i in range(10):
+            for j in range(10):
+                x = i/10 * width
+                y = j/10 * height
+                page.draw_circle(pymupdf.Point(x, y), color=(0,0,0), radius=0.2, width=0.1)
+                page.insert_htmlbox(pymupdf.Rect(x, y, x+width/10, y+height/20), f'<small><small><small><small>({x=:.1f},{y=:.1f})</small></small></small></small>', )
+
+        # rotate the middle point by the page rotation for the new toc entry
+        toc_link_coords = circle_middle_point
+        print(f'{toc_link_coords=}')
+        
+        toc = [
+            (
+                1,
+                "Link to circle",
+                1,
+                {
+                    "kind": pymupdf.LINK_GOTO,
+                    "page": 1,
+                    "to": toc_link_coords,
+                    "from": pymupdf.Rect(0, 0, height / 4, width / 4),
+                },
+            )
+        ]
+        doc.set_toc(toc, 0)  # set the toc
+        
+        page = doc.new_page(width=200, height=300)
+        from_rect = pymupdf.Rect(10, 10, 100, 50)
+        page.insert_htmlbox(from_rect, 'link')
+        link = dict()
+        link['from'] = from_rect
+        link['kind'] = pymupdf.LINK_GOTO
+        link['to'] = toc_link_coords
+        link['page'] = 0
+        page.insert_link(link)
+        
+        path = os.path.normpath(f'{__file__}/../../tests/test_3400.pdf')
+        doc.save(path)
+        print(f'Saved to {path=}.')
+        
+        links_expected = [
+                (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'})
+                ]
+
+        links_actual = list()
+        for page_i, page in enumerate(doc):
+            links = page.get_links()
+            for link_i, link in enumerate(links):
+                print(f'({page_i}, {link!r})')
+                links_actual.append( (page_i, link) )
+    
+        assert links_actual == links_expected
+
+
+
+def test_3820():
+    """Ensure all extended TOC items point to pages."""
+    doc = pymupdf.open(file_3820)
+    toc = doc.get_toc(simple=False)
+    for _, _, epage, dest in toc:
+        assert epage == dest["page"] + 1
+
+