Mercurial > hgrepos > Python2 > PyMuPDF
view tests/test_toc.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
line wrap: on
line source
""" * Verify equality of generated TOCs and expected results. * Verify TOC deletion works * Verify manipulation of single TOC item works * Verify stability against circular TOC items """ import os import sys import pymupdf import pathlib scriptdir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join(scriptdir, "resources", "001003ED.pdf") filename2 = os.path.join(scriptdir, "resources", "2.pdf") circular = os.path.join(scriptdir, "resources", "circular-toc.pdf") full_toc = os.path.join(scriptdir, "resources", "full_toc.txt") simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt") file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf") doc = pymupdf.open(filename) def test_simple_toc(): simple_lines = open(simple_toc, "rb").read() toc = b"".join([str(t).encode() for t in doc.get_toc(True)]) assert toc == simple_lines def test_full_toc(): if not hasattr(pymupdf, "mupdf"): # Classic implementation does not have fix for this test. print(f"Not running test_full_toc on classic implementation.") return expected_path = f"{scriptdir}/resources/full_toc.txt" expected = pathlib.Path(expected_path).read_bytes() # Github windows x32 seems to insert \r characters; maybe something to # do with the Python installation's line endings settings. expected = expected.decode("utf8") expected = expected.replace('\r', '') toc = "\n".join([str(t) for t in doc.get_toc(False)]) toc += "\n" assert toc == expected def test_erase_toc(): doc.set_toc([]) assert doc.get_toc() == [] def test_replace_toc(): toc = doc.get_toc(False) doc.set_toc(toc) def test_setcolors(): doc = pymupdf.open(filename2) toc = doc.get_toc(False) for i in range(len(toc)): d = toc[i][3] d["color"] = (1, 0, 0) d["bold"] = True d["italic"] = True doc.set_toc_item(i, dest_dict=d) toc2 = doc.get_toc(False) assert len(toc2) == len(toc) for t in toc2: d = t[3] assert d["bold"] assert d["italic"] assert d["color"] == (1, 0, 0) def test_circular(): """The test file contains circular bookmarks.""" doc = pymupdf.open(circular) toc = doc.get_toc(False) # this must not loop rebased = hasattr(pymupdf, 'mupdf') if rebased: wt = pymupdf.TOOLS.mupdf_warnings() assert wt == 'Bad or missing prev pointer in outline tree, repairing', \ f'{wt=}' def test_2355(): # Create a test PDF with toc. doc = pymupdf.Document() for _ in range(10): doc.new_page(doc.page_count) doc.set_toc([[1, 'test', 1], [1, 'test2', 5]]) path = 'test_2355.pdf' doc.save(path) # Open many times for i in range(10): with pymupdf.open(path) as new_doc: new_doc.get_toc() # Open once and read many times with pymupdf.open(path) as new_doc: for i in range(10): new_doc.get_toc() def test_2788(): ''' Check handling of Document.get_toc() when toc item has kind=4. ''' if not hasattr(pymupdf, 'mupdf'): # Classic implementation does not have fix for this test. print(f'Not running test_2788 on classic implementation.') return path = os.path.abspath(f'{__file__}/../../tests/resources/test_2788.pdf') document = pymupdf.open(path) toc0 = [[1, 'page2', 2, {'kind': 4, 'xref': 14, 'page': 1, 'to': pymupdf.Point(100.0, 760.0), 'zoom': 0.0, 'nameddest': 'page.2'}]] toc1 = document.get_toc(simple=False) print(f'{toc0=}') print(f'{toc1=}') assert toc1 == toc0 doc.set_toc(toc0) toc2 = document.get_toc(simple=False) print(f'{toc0=}') print(f'{toc2=}') assert toc2 == toc0 # Also test Page.get_links() bugfix from #2817. for page in document: page.get_links() rebased = hasattr(pymupdf, 'mupdf') if rebased: wt = pymupdf.TOOLS.mupdf_warnings() assert wt == ( "syntax error: expected 'obj' keyword (0 3 ?)\n" "trying to repair broken xref\n" "repairing PDF document" ), f'{wt=}' def test_toc_count(): file_in = os.path.abspath(f'{__file__}/../../tests/resources/test_toc_count.pdf') file_out = os.path.abspath(f'{__file__}/../../tests/test_toc_count_out.pdf') def get(doc): outlines = doc.xref_get_key(doc.pdf_catalog(), "Outlines") ret = doc.xref_object(int(outlines[1].split()[0])) return ret print() with pymupdf.open(file_in) as doc: print(f'1: {get(doc)}') toc = doc.get_toc(simple=False) doc.set_toc([]) #print(f'2: {get(doc)}') doc.set_toc(toc) print(f'3: {get(doc)}') doc.save(file_out, garbage=4) with pymupdf.open(file_out) as doc: print(f'4: {get(doc)}') pymupdf._log_items_clear() def test_3347(): ''' Check fix for #3347 - link destination rectangles when source/destination pages have different sizes. ''' doc = pymupdf.open() doc.new_page(width=500, height=800) doc.new_page(width=800, height=500) rects = [ (0, pymupdf.Rect(10, 20, 50, 40), pymupdf.utils.getColor('red')), (0, pymupdf.Rect(300, 350, 400, 450), pymupdf.utils.getColor('green')), (1, pymupdf.Rect(20, 30, 40, 50), pymupdf.utils.getColor('blue')), (1, pymupdf.Rect(350, 300, 450, 400), pymupdf.utils.getColor('black')) ] for page, rect, color in rects: doc[page].draw_rect(rect, color=color) for (from_page, from_rect, _), (to_page, to_rect, _) in zip(rects, rects[1:] + rects[:1]): doc[from_page].insert_link({ 'kind': 1, 'from': from_rect, 'page': to_page, 'to': to_rect.top_left, }) links_expected = [ (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}), (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}), (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}), (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}), ] path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf') doc.save(path) print(f'Have saved to {path=}.') links_actual = list() for page_i, page in enumerate(doc): links = page.get_links() for link_i, link in enumerate(links): print(f'{page_i=} {link_i=}: {link!r}') links_actual.append( (page_i, link) ) assert links_actual == links_expected def test_3400(): ''' Check fix for #3400 - link destination rectangles when source/destination pages have different rotations. ''' width = 750 height = 1110 circle_middle_point = pymupdf.Point(height / 4, width / 4) print(f'{circle_middle_point=}') with pymupdf.open() as doc: page = doc.new_page(width=width, height=height) page.set_rotation(270) # draw a circle at the middle point to facilitate debugging page.draw_circle(circle_middle_point, color=(0, 0, 1), radius=5, width=2) for i in range(10): for j in range(10): x = i/10 * width y = j/10 * height page.draw_circle(pymupdf.Point(x, y), color=(0,0,0), radius=0.2, width=0.1) page.insert_htmlbox(pymupdf.Rect(x, y, x+width/10, y+height/20), f'<small><small><small><small>({x=:.1f},{y=:.1f})</small></small></small></small>', ) # rotate the middle point by the page rotation for the new toc entry toc_link_coords = circle_middle_point print(f'{toc_link_coords=}') toc = [ ( 1, "Link to circle", 1, { "kind": pymupdf.LINK_GOTO, "page": 1, "to": toc_link_coords, "from": pymupdf.Rect(0, 0, height / 4, width / 4), }, ) ] doc.set_toc(toc, 0) # set the toc page = doc.new_page(width=200, height=300) from_rect = pymupdf.Rect(10, 10, 100, 50) page.insert_htmlbox(from_rect, 'link') link = dict() link['from'] = from_rect link['kind'] = pymupdf.LINK_GOTO link['to'] = toc_link_coords link['page'] = 0 page.insert_link(link) path = os.path.normpath(f'{__file__}/../../tests/test_3400.pdf') doc.save(path) print(f'Saved to {path=}.') links_expected = [ (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'}) ] links_actual = list() for page_i, page in enumerate(doc): links = page.get_links() for link_i, link in enumerate(links): print(f'({page_i}, {link!r})') links_actual.append( (page_i, link) ) assert links_actual == links_expected def test_3820(): """Ensure all extended TOC items point to pages.""" doc = pymupdf.open(file_3820) toc = doc.get_toc(simple=False) for _, _, epage, dest in toc: assert epage == dest["page"] + 1
