comparison tests/test_textbox.py @ 39:a6bc019ac0b2 upstream

ADD: PyMuPDF v1.26.5: the original sdist.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:19:58 +0200
parents 1d09e1dec1d9
children
comparison
equal deleted inserted replaced
2:b50eed0cc0ef 39:a6bc019ac0b2
4 2. Basic text output 4 2. Basic text output
5 5
6 Check text is indeed contained in given rectangle. 6 Check text is indeed contained in given rectangle.
7 """ 7 """
8 import pymupdf 8 import pymupdf
9
10 import gentle_compare
11
12 import os
13 import textwrap
9 14
10 # codespell:ignore-begin 15 # codespell:ignore-begin
11 text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca. 16 text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
12 17
13 Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner. 18 Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner.
180 page = doc.new_page() 185 page = doc.new_page()
181 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1) 186 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1)
182 assert spare_height < 0 187 assert spare_height < 0
183 assert scale == 1 188 assert scale == 1
184 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0) 189 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
185 assert spare_height == 0 190 page.draw_rect(rect, (1, 0, 0))
191 doc.save(os.path.normpath(f'{__file__}/../../tests/test_htmlbox1.pdf'))
192 assert abs(spare_height - 3.8507) < 0.001
186 assert 0 < scale < 1 193 assert 0 < scale < 1
187 page = doc.reload_page(page) 194 page = doc.reload_page(page)
188 link = page.get_links()[0] # extracts the links on the page 195 link = page.get_links()[0] # extracts the links on the page
189 196
190 assert link["uri"] == "https://www.artifex.com" 197 assert link["uri"] == "https://www.artifex.com"
284 page = document.new_page() 291 page = document.new_page()
285 writer = pymupdf.TextWriter(page.rect) 292 writer = pymupdf.TextWriter(page.rect)
286 text = '111111111' 293 text = '111111111'
287 print(f'Calling writer.fill_textbox().', flush=1) 294 print(f'Calling writer.fill_textbox().', flush=1)
288 writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8) 295 writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)
296
297
298 def test_4613():
299 print()
300 text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n'
301 story = pymupdf.Story(text)
302 rect = pymupdf.Rect(10, 10, 100, 100)
303
304 # Test default operation where we get additional scaling down because of
305 # the long words in our text.
306 print(f'test_4613(): ### Testing default operation.')
307 with pymupdf.open() as doc:
308 page = doc.new_page()
309 spare_height, scale = page.insert_htmlbox(rect, story)
310 print(f'test_4613(): {spare_height=} {scale=}')
311 # The additional down-scaling from the long word widths results in
312 # spare vertical space.
313 page.draw_rect(rect, (1, 0, 0))
314 path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf')
315 doc.save(path)
316
317 path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png')
318 path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png')
319 pixmap = page.get_pixmap(dpi=300)
320 pixmap.save(path_pixmap)
321
322 pixmap_diff = gentle_compare.pixmaps_diff(path_pixmap_expected, pixmap)
323 pixmap_diff.save(os.path.normpath(f'{__file__}/../../tests/test_4613-diff.png'))
324
325 rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected)
326 print(f'{rms=}')
327 assert rms == 0, f'{rms=}'
328
329 assert abs(spare_height - 45.7536) < 0.1
330 assert abs(scale - 0.4009) < 0.01
331
332 new_text = page.get_text('text', clip=rect)
333 print(f'test_4613(): new_text:')
334 print(textwrap.indent(new_text, ' '))
335 assert new_text == text
336
337 # Check with _scale_word_width=False - ignore too-wide words.
338 print(f'test_4613(): ### Testing with _scale_word_width=False.')
339 with pymupdf.open() as doc:
340 page = doc.new_page()
341 spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False)
342 print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}')
343 # With _scale_word_width=False we allow long words to extend beyond the
344 # rect, so we should have spare_height == 0 and only a small amount of
345 # down-scaling.
346 assert spare_height == 0
347 assert abs(scale - 0.914) < 0.01
348 new_text = page.get_text('text', clip=rect)
349 print(f'test_4613(): new_text:')
350 print(textwrap.indent(new_text, ' '))
351 assert new_text == textwrap.dedent('''
352 abcdefghijklmno
353 ABCDEFGHIJKLM
354 abcdefghijklmno
355 ABCDEFGHIJKLM
356 abcdefghijklmno
357 ABCDEFGHIJKLM
358 ''')[1:]
359
360
361 # Check that we get no fit if scale_low is not low enough.
362 print(f'test_4613(): ### Testing with scale_low too high to allow a fit.')
363 with pymupdf.open() as doc:
364 page = doc.new_page()
365 scale_low=0.6
366 spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low)
367 print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}')
368 assert spare_height == -1
369 assert scale == scale_low