Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_toc.py @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | 1d09e1dec1d9 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 """ | |
| 2 * Verify equality of generated TOCs and expected results. | |
| 3 * Verify TOC deletion works | |
| 4 * Verify manipulation of single TOC item works | |
| 5 * Verify stability against circular TOC items | |
| 6 """ | |
| 7 | |
| 8 import os | |
| 9 import sys | |
| 10 import pymupdf | |
| 11 import pathlib | |
| 12 | |
| 13 scriptdir = os.path.abspath(os.path.dirname(__file__)) | |
| 14 filename = os.path.join(scriptdir, "resources", "001003ED.pdf") | |
| 15 filename2 = os.path.join(scriptdir, "resources", "2.pdf") | |
| 16 circular = os.path.join(scriptdir, "resources", "circular-toc.pdf") | |
| 17 full_toc = os.path.join(scriptdir, "resources", "full_toc.txt") | |
| 18 simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt") | |
| 19 file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf") | |
| 20 doc = pymupdf.open(filename) | |
| 21 | |
| 22 | |
| 23 def test_simple_toc(): | |
| 24 simple_lines = open(simple_toc, "rb").read() | |
| 25 toc = b"".join([str(t).encode() for t in doc.get_toc(True)]) | |
| 26 assert toc == simple_lines | |
| 27 | |
| 28 | |
| 29 def test_full_toc(): | |
| 30 if not hasattr(pymupdf, "mupdf"): | |
| 31 # Classic implementation does not have fix for this test. | |
| 32 print(f"Not running test_full_toc on classic implementation.") | |
| 33 return | |
| 34 expected_path = f"{scriptdir}/resources/full_toc.txt" | |
| 35 expected = pathlib.Path(expected_path).read_bytes() | |
| 36 # Github windows x32 seems to insert \r characters; maybe something to | |
| 37 # do with the Python installation's line endings settings. | |
| 38 expected = expected.decode("utf8") | |
| 39 expected = expected.replace('\r', '') | |
| 40 toc = "\n".join([str(t) for t in doc.get_toc(False)]) | |
| 41 toc += "\n" | |
| 42 assert toc == expected | |
| 43 | |
| 44 | |
| 45 def test_erase_toc(): | |
| 46 doc.set_toc([]) | |
| 47 assert doc.get_toc() == [] | |
| 48 | |
| 49 | |
| 50 def test_replace_toc(): | |
| 51 toc = doc.get_toc(False) | |
| 52 doc.set_toc(toc) | |
| 53 | |
| 54 | |
| 55 def test_setcolors(): | |
| 56 doc = pymupdf.open(filename2) | |
| 57 toc = doc.get_toc(False) | |
| 58 for i in range(len(toc)): | |
| 59 d = toc[i][3] | |
| 60 d["color"] = (1, 0, 0) | |
| 61 d["bold"] = True | |
| 62 d["italic"] = True | |
| 63 doc.set_toc_item(i, dest_dict=d) | |
| 64 | |
| 65 toc2 = doc.get_toc(False) | |
| 66 assert len(toc2) == len(toc) | |
| 67 | |
| 68 for t in toc2: | |
| 69 d = t[3] | |
| 70 assert d["bold"] | |
| 71 assert d["italic"] | |
| 72 assert d["color"] == (1, 0, 0) | |
| 73 | |
| 74 | |
| 75 def test_circular(): | |
| 76 """The test file contains circular bookmarks.""" | |
| 77 doc = pymupdf.open(circular) | |
| 78 toc = doc.get_toc(False) # this must not loop | |
| 79 rebased = hasattr(pymupdf, 'mupdf') | |
| 80 if rebased: | |
| 81 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 82 assert wt == 'Bad or missing prev pointer in outline tree, repairing', \ | |
| 83 f'{wt=}' | |
| 84 | |
| 85 def test_2355(): | |
| 86 | |
| 87 # Create a test PDF with toc. | |
| 88 doc = pymupdf.Document() | |
| 89 for _ in range(10): | |
| 90 doc.new_page(doc.page_count) | |
| 91 doc.set_toc([[1, 'test', 1], [1, 'test2', 5]]) | |
| 92 | |
| 93 path = 'test_2355.pdf' | |
| 94 doc.save(path) | |
| 95 | |
| 96 # Open many times | |
| 97 for i in range(10): | |
| 98 with pymupdf.open(path) as new_doc: | |
| 99 new_doc.get_toc() | |
| 100 | |
| 101 # Open once and read many times | |
| 102 with pymupdf.open(path) as new_doc: | |
| 103 for i in range(10): | |
| 104 new_doc.get_toc() | |
| 105 | |
| 106 def test_2788(): | |
| 107 ''' | |
| 108 Check handling of Document.get_toc() when toc item has kind=4. | |
| 109 ''' | |
| 110 if not hasattr(pymupdf, 'mupdf'): | |
| 111 # Classic implementation does not have fix for this test. | |
| 112 print(f'Not running test_2788 on classic implementation.') | |
| 113 return | |
| 114 path = os.path.abspath(f'{__file__}/../../tests/resources/test_2788.pdf') | |
| 115 document = pymupdf.open(path) | |
| 116 toc0 = [[1, 'page2', 2, {'kind': 4, 'xref': 14, 'page': 1, 'to': pymupdf.Point(100.0, 760.0), 'zoom': 0.0, 'nameddest': 'page.2'}]] | |
| 117 toc1 = document.get_toc(simple=False) | |
| 118 print(f'{toc0=}') | |
| 119 print(f'{toc1=}') | |
| 120 assert toc1 == toc0 | |
| 121 | |
| 122 doc.set_toc(toc0) | |
| 123 toc2 = document.get_toc(simple=False) | |
| 124 print(f'{toc0=}') | |
| 125 print(f'{toc2=}') | |
| 126 assert toc2 == toc0 | |
| 127 | |
| 128 # Also test Page.get_links() bugfix from #2817. | |
| 129 for page in document: | |
| 130 page.get_links() | |
| 131 rebased = hasattr(pymupdf, 'mupdf') | |
| 132 if rebased: | |
| 133 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 134 assert wt == ( | |
| 135 "syntax error: expected 'obj' keyword (0 3 ?)\n" | |
| 136 "trying to repair broken xref\n" | |
| 137 "repairing PDF document" | |
| 138 ), f'{wt=}' | |
| 139 | |
| 140 | |
| 141 def test_toc_count(): | |
| 142 file_in = os.path.abspath(f'{__file__}/../../tests/resources/test_toc_count.pdf') | |
| 143 file_out = os.path.abspath(f'{__file__}/../../tests/test_toc_count_out.pdf') | |
| 144 | |
| 145 def get(doc): | |
| 146 outlines = doc.xref_get_key(doc.pdf_catalog(), "Outlines") | |
| 147 ret = doc.xref_object(int(outlines[1].split()[0])) | |
| 148 return ret | |
| 149 print() | |
| 150 with pymupdf.open(file_in) as doc: | |
| 151 print(f'1: {get(doc)}') | |
| 152 toc = doc.get_toc(simple=False) | |
| 153 doc.set_toc([]) | |
| 154 #print(f'2: {get(doc)}') | |
| 155 doc.set_toc(toc) | |
| 156 print(f'3: {get(doc)}') | |
| 157 doc.save(file_out, garbage=4) | |
| 158 with pymupdf.open(file_out) as doc: | |
| 159 print(f'4: {get(doc)}') | |
| 160 pymupdf._log_items_clear() | |
| 161 | |
| 162 | |
| 163 def test_3347(): | |
| 164 ''' | |
| 165 Check fix for #3347 - link destination rectangles when source/destination | |
| 166 pages have different sizes. | |
| 167 ''' | |
| 168 doc = pymupdf.open() | |
| 169 doc.new_page(width=500, height=800) | |
| 170 doc.new_page(width=800, height=500) | |
| 171 rects = [ | |
| 172 (0, pymupdf.Rect(10, 20, 50, 40), pymupdf.utils.getColor('red')), | |
| 173 (0, pymupdf.Rect(300, 350, 400, 450), pymupdf.utils.getColor('green')), | |
| 174 (1, pymupdf.Rect(20, 30, 40, 50), pymupdf.utils.getColor('blue')), | |
| 175 (1, pymupdf.Rect(350, 300, 450, 400), pymupdf.utils.getColor('black')) | |
| 176 ] | |
| 177 | |
| 178 for page, rect, color in rects: | |
| 179 doc[page].draw_rect(rect, color=color) | |
| 180 | |
| 181 for (from_page, from_rect, _), (to_page, to_rect, _) in zip(rects, rects[1:] + rects[:1]): | |
| 182 doc[from_page].insert_link({ | |
| 183 'kind': 1, | |
| 184 'from': from_rect, | |
| 185 'page': to_page, | |
| 186 'to': to_rect.top_left, | |
| 187 }) | |
| 188 | |
| 189 links_expected = [ | |
| 190 (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}), | |
| 191 (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}), | |
| 192 (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}), | |
| 193 (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}), | |
| 194 ] | |
| 195 | |
| 196 path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf') | |
| 197 doc.save(path) | |
| 198 print(f'Have saved to {path=}.') | |
| 199 | |
| 200 links_actual = list() | |
| 201 for page_i, page in enumerate(doc): | |
| 202 links = page.get_links() | |
| 203 for link_i, link in enumerate(links): | |
| 204 print(f'{page_i=} {link_i=}: {link!r}') | |
| 205 links_actual.append( (page_i, link) ) | |
| 206 | |
| 207 assert links_actual == links_expected | |
| 208 | |
| 209 | |
| 210 def test_3400(): | |
| 211 ''' | |
| 212 Check fix for #3400 - link destination rectangles when source/destination | |
| 213 pages have different rotations. | |
| 214 ''' | |
| 215 width = 750 | |
| 216 height = 1110 | |
| 217 circle_middle_point = pymupdf.Point(height / 4, width / 4) | |
| 218 print(f'{circle_middle_point=}') | |
| 219 with pymupdf.open() as doc: | |
| 220 | |
| 221 page = doc.new_page(width=width, height=height) | |
| 222 page.set_rotation(270) | |
| 223 # draw a circle at the middle point to facilitate debugging | |
| 224 page.draw_circle(circle_middle_point, color=(0, 0, 1), radius=5, width=2) | |
| 225 | |
| 226 for i in range(10): | |
| 227 for j in range(10): | |
| 228 x = i/10 * width | |
| 229 y = j/10 * height | |
| 230 page.draw_circle(pymupdf.Point(x, y), color=(0,0,0), radius=0.2, width=0.1) | |
| 231 page.insert_htmlbox(pymupdf.Rect(x, y, x+width/10, y+height/20), f'<small><small><small><small>({x=:.1f},{y=:.1f})</small></small></small></small>', ) | |
| 232 | |
| 233 # rotate the middle point by the page rotation for the new toc entry | |
| 234 toc_link_coords = circle_middle_point | |
| 235 print(f'{toc_link_coords=}') | |
| 236 | |
| 237 toc = [ | |
| 238 ( | |
| 239 1, | |
| 240 "Link to circle", | |
| 241 1, | |
| 242 { | |
| 243 "kind": pymupdf.LINK_GOTO, | |
| 244 "page": 1, | |
| 245 "to": toc_link_coords, | |
| 246 "from": pymupdf.Rect(0, 0, height / 4, width / 4), | |
| 247 }, | |
| 248 ) | |
| 249 ] | |
| 250 doc.set_toc(toc, 0) # set the toc | |
| 251 | |
| 252 page = doc.new_page(width=200, height=300) | |
| 253 from_rect = pymupdf.Rect(10, 10, 100, 50) | |
| 254 page.insert_htmlbox(from_rect, 'link') | |
| 255 link = dict() | |
| 256 link['from'] = from_rect | |
| 257 link['kind'] = pymupdf.LINK_GOTO | |
| 258 link['to'] = toc_link_coords | |
| 259 link['page'] = 0 | |
| 260 page.insert_link(link) | |
| 261 | |
| 262 path = os.path.normpath(f'{__file__}/../../tests/test_3400.pdf') | |
| 263 doc.save(path) | |
| 264 print(f'Saved to {path=}.') | |
| 265 | |
| 266 links_expected = [ | |
| 267 (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'}) | |
| 268 ] | |
| 269 | |
| 270 links_actual = list() | |
| 271 for page_i, page in enumerate(doc): | |
| 272 links = page.get_links() | |
| 273 for link_i, link in enumerate(links): | |
| 274 print(f'({page_i}, {link!r})') | |
| 275 links_actual.append( (page_i, link) ) | |
| 276 | |
| 277 assert links_actual == links_expected | |
| 278 | |
| 279 | |
| 280 | |
| 281 def test_3820(): | |
| 282 """Ensure all extended TOC items point to pages.""" | |
| 283 doc = pymupdf.open(file_3820) | |
| 284 toc = doc.get_toc(simple=False) | |
| 285 for _, _, epage, dest in toc: | |
| 286 assert epage == dest["page"] + 1 | |
| 287 | |
| 288 |
