comparison tests/test_toc.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 """
2 * Verify equality of generated TOCs and expected results.
3 * Verify TOC deletion works
4 * Verify manipulation of single TOC item works
5 * Verify stability against circular TOC items
6 """
7
8 import os
9 import sys
10 import pymupdf
11 import pathlib
12
13 scriptdir = os.path.abspath(os.path.dirname(__file__))
14 filename = os.path.join(scriptdir, "resources", "001003ED.pdf")
15 filename2 = os.path.join(scriptdir, "resources", "2.pdf")
16 circular = os.path.join(scriptdir, "resources", "circular-toc.pdf")
17 full_toc = os.path.join(scriptdir, "resources", "full_toc.txt")
18 simple_toc = os.path.join(scriptdir, "resources", "simple_toc.txt")
19 file_3820 = os.path.join(scriptdir, "resources", "test-3820.pdf")
20 doc = pymupdf.open(filename)
21
22
23 def test_simple_toc():
24 simple_lines = open(simple_toc, "rb").read()
25 toc = b"".join([str(t).encode() for t in doc.get_toc(True)])
26 assert toc == simple_lines
27
28
29 def test_full_toc():
30 if not hasattr(pymupdf, "mupdf"):
31 # Classic implementation does not have fix for this test.
32 print(f"Not running test_full_toc on classic implementation.")
33 return
34 expected_path = f"{scriptdir}/resources/full_toc.txt"
35 expected = pathlib.Path(expected_path).read_bytes()
36 # Github windows x32 seems to insert \r characters; maybe something to
37 # do with the Python installation's line endings settings.
38 expected = expected.decode("utf8")
39 expected = expected.replace('\r', '')
40 toc = "\n".join([str(t) for t in doc.get_toc(False)])
41 toc += "\n"
42 assert toc == expected
43
44
45 def test_erase_toc():
46 doc.set_toc([])
47 assert doc.get_toc() == []
48
49
50 def test_replace_toc():
51 toc = doc.get_toc(False)
52 doc.set_toc(toc)
53
54
55 def test_setcolors():
56 doc = pymupdf.open(filename2)
57 toc = doc.get_toc(False)
58 for i in range(len(toc)):
59 d = toc[i][3]
60 d["color"] = (1, 0, 0)
61 d["bold"] = True
62 d["italic"] = True
63 doc.set_toc_item(i, dest_dict=d)
64
65 toc2 = doc.get_toc(False)
66 assert len(toc2) == len(toc)
67
68 for t in toc2:
69 d = t[3]
70 assert d["bold"]
71 assert d["italic"]
72 assert d["color"] == (1, 0, 0)
73
74
75 def test_circular():
76 """The test file contains circular bookmarks."""
77 doc = pymupdf.open(circular)
78 toc = doc.get_toc(False) # this must not loop
79 rebased = hasattr(pymupdf, 'mupdf')
80 if rebased:
81 wt = pymupdf.TOOLS.mupdf_warnings()
82 assert wt == 'Bad or missing prev pointer in outline tree, repairing', \
83 f'{wt=}'
84
85 def test_2355():
86
87 # Create a test PDF with toc.
88 doc = pymupdf.Document()
89 for _ in range(10):
90 doc.new_page(doc.page_count)
91 doc.set_toc([[1, 'test', 1], [1, 'test2', 5]])
92
93 path = 'test_2355.pdf'
94 doc.save(path)
95
96 # Open many times
97 for i in range(10):
98 with pymupdf.open(path) as new_doc:
99 new_doc.get_toc()
100
101 # Open once and read many times
102 with pymupdf.open(path) as new_doc:
103 for i in range(10):
104 new_doc.get_toc()
105
106 def test_2788():
107 '''
108 Check handling of Document.get_toc() when toc item has kind=4.
109 '''
110 if not hasattr(pymupdf, 'mupdf'):
111 # Classic implementation does not have fix for this test.
112 print(f'Not running test_2788 on classic implementation.')
113 return
114 path = os.path.abspath(f'{__file__}/../../tests/resources/test_2788.pdf')
115 document = pymupdf.open(path)
116 toc0 = [[1, 'page2', 2, {'kind': 4, 'xref': 14, 'page': 1, 'to': pymupdf.Point(100.0, 760.0), 'zoom': 0.0, 'nameddest': 'page.2'}]]
117 toc1 = document.get_toc(simple=False)
118 print(f'{toc0=}')
119 print(f'{toc1=}')
120 assert toc1 == toc0
121
122 doc.set_toc(toc0)
123 toc2 = document.get_toc(simple=False)
124 print(f'{toc0=}')
125 print(f'{toc2=}')
126 assert toc2 == toc0
127
128 # Also test Page.get_links() bugfix from #2817.
129 for page in document:
130 page.get_links()
131 rebased = hasattr(pymupdf, 'mupdf')
132 if rebased:
133 wt = pymupdf.TOOLS.mupdf_warnings()
134 assert wt == (
135 "syntax error: expected 'obj' keyword (0 3 ?)\n"
136 "trying to repair broken xref\n"
137 "repairing PDF document"
138 ), f'{wt=}'
139
140
141 def test_toc_count():
142 file_in = os.path.abspath(f'{__file__}/../../tests/resources/test_toc_count.pdf')
143 file_out = os.path.abspath(f'{__file__}/../../tests/test_toc_count_out.pdf')
144
145 def get(doc):
146 outlines = doc.xref_get_key(doc.pdf_catalog(), "Outlines")
147 ret = doc.xref_object(int(outlines[1].split()[0]))
148 return ret
149 print()
150 with pymupdf.open(file_in) as doc:
151 print(f'1: {get(doc)}')
152 toc = doc.get_toc(simple=False)
153 doc.set_toc([])
154 #print(f'2: {get(doc)}')
155 doc.set_toc(toc)
156 print(f'3: {get(doc)}')
157 doc.save(file_out, garbage=4)
158 with pymupdf.open(file_out) as doc:
159 print(f'4: {get(doc)}')
160 pymupdf._log_items_clear()
161
162
163 def test_3347():
164 '''
165 Check fix for #3347 - link destination rectangles when source/destination
166 pages have different sizes.
167 '''
168 doc = pymupdf.open()
169 doc.new_page(width=500, height=800)
170 doc.new_page(width=800, height=500)
171 rects = [
172 (0, pymupdf.Rect(10, 20, 50, 40), pymupdf.utils.getColor('red')),
173 (0, pymupdf.Rect(300, 350, 400, 450), pymupdf.utils.getColor('green')),
174 (1, pymupdf.Rect(20, 30, 40, 50), pymupdf.utils.getColor('blue')),
175 (1, pymupdf.Rect(350, 300, 450, 400), pymupdf.utils.getColor('black'))
176 ]
177
178 for page, rect, color in rects:
179 doc[page].draw_rect(rect, color=color)
180
181 for (from_page, from_rect, _), (to_page, to_rect, _) in zip(rects, rects[1:] + rects[:1]):
182 doc[from_page].insert_link({
183 'kind': 1,
184 'from': from_rect,
185 'page': to_page,
186 'to': to_rect.top_left,
187 })
188
189 links_expected = [
190 (0, {'kind': 1, 'xref': 11, 'from': pymupdf.Rect(10.0, 20.0, 50.0, 40.0), 'page': 0, 'to': pymupdf.Point(300.0, 350.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
191 (0, {'kind': 1, 'xref': 12, 'from': pymupdf.Rect(300.0, 350.0, 400.0, 450.0), 'page': 1, 'to': pymupdf.Point(20.0, 30.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
192 (1, {'kind': 1, 'xref': 13, 'from': pymupdf.Rect(20.0, 30.0, 40.0, 50.0), 'page': 1, 'to': pymupdf.Point(350.0, 300.0), 'zoom': 0.0, 'id': 'fitz-L0'}),
193 (1, {'kind': 1, 'xref': 14, 'from': pymupdf.Rect(350.0, 300.0, 450.0, 400.0), 'page': 0, 'to': pymupdf.Point(10.0, 20.0), 'zoom': 0.0, 'id': 'fitz-L1'}),
194 ]
195
196 path = os.path.normpath(f'{__file__}/../../tests/test_3347_out.pdf')
197 doc.save(path)
198 print(f'Have saved to {path=}.')
199
200 links_actual = list()
201 for page_i, page in enumerate(doc):
202 links = page.get_links()
203 for link_i, link in enumerate(links):
204 print(f'{page_i=} {link_i=}: {link!r}')
205 links_actual.append( (page_i, link) )
206
207 assert links_actual == links_expected
208
209
210 def test_3400():
211 '''
212 Check fix for #3400 - link destination rectangles when source/destination
213 pages have different rotations.
214 '''
215 width = 750
216 height = 1110
217 circle_middle_point = pymupdf.Point(height / 4, width / 4)
218 print(f'{circle_middle_point=}')
219 with pymupdf.open() as doc:
220
221 page = doc.new_page(width=width, height=height)
222 page.set_rotation(270)
223 # draw a circle at the middle point to facilitate debugging
224 page.draw_circle(circle_middle_point, color=(0, 0, 1), radius=5, width=2)
225
226 for i in range(10):
227 for j in range(10):
228 x = i/10 * width
229 y = j/10 * height
230 page.draw_circle(pymupdf.Point(x, y), color=(0,0,0), radius=0.2, width=0.1)
231 page.insert_htmlbox(pymupdf.Rect(x, y, x+width/10, y+height/20), f'<small><small><small><small>({x=:.1f},{y=:.1f})</small></small></small></small>', )
232
233 # rotate the middle point by the page rotation for the new toc entry
234 toc_link_coords = circle_middle_point
235 print(f'{toc_link_coords=}')
236
237 toc = [
238 (
239 1,
240 "Link to circle",
241 1,
242 {
243 "kind": pymupdf.LINK_GOTO,
244 "page": 1,
245 "to": toc_link_coords,
246 "from": pymupdf.Rect(0, 0, height / 4, width / 4),
247 },
248 )
249 ]
250 doc.set_toc(toc, 0) # set the toc
251
252 page = doc.new_page(width=200, height=300)
253 from_rect = pymupdf.Rect(10, 10, 100, 50)
254 page.insert_htmlbox(from_rect, 'link')
255 link = dict()
256 link['from'] = from_rect
257 link['kind'] = pymupdf.LINK_GOTO
258 link['to'] = toc_link_coords
259 link['page'] = 0
260 page.insert_link(link)
261
262 path = os.path.normpath(f'{__file__}/../../tests/test_3400.pdf')
263 doc.save(path)
264 print(f'Saved to {path=}.')
265
266 links_expected = [
267 (1, {'kind': 1, 'xref': 1120, 'from': pymupdf.Rect(10.0, 10.0, 100.0, 50.0), 'page': 0, 'to': pymupdf.Point(187.5, 472.5), 'zoom': 0.0, 'id': 'fitz-L0'})
268 ]
269
270 links_actual = list()
271 for page_i, page in enumerate(doc):
272 links = page.get_links()
273 for link_i, link in enumerate(links):
274 print(f'({page_i}, {link!r})')
275 links_actual.append( (page_i, link) )
276
277 assert links_actual == links_expected
278
279
280
281 def test_3820():
282 """Ensure all extended TOC items point to pages."""
283 doc = pymupdf.open(file_3820)
284 toc = doc.get_toc(simple=False)
285 for _, _, epage, dest in toc:
286 assert epage == dest["page"] + 1
287
288