Mercurial > hgrepos > Python2 > PyMuPDF
view tests/test_textbox.py @ 46:7ee69f120f19 default tip
>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 17:17:30 +0200 |
| parents | a6bc019ac0b2 |
| children |
line wrap: on
line source
""" Fill a given text in a rectangle on some PDF page using 1. TextWriter object 2. Basic text output Check text is indeed contained in given rectangle. """ import pymupdf import gentle_compare import os import textwrap # codespell:ignore-begin text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca. Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner. Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten. Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen.""" # codespell:ignore-end def test_textbox1(): """Use TextWriter for text insertion.""" doc = pymupdf.open() page = doc.new_page() rect = pymupdf.Rect(50, 50, 400, 400) blue = (0, 0, 1) tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, ) tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) page.write_text(writers=tw) def test_textbox2(): """Use basic text insertion.""" doc = pymupdf.open() ocg = doc.add_ocg("ocg1") page = doc.new_page() rect = pymupdf.Rect(50, 50, 400, 400) blue = pymupdf.utils.getColor("lightblue") red = pymupdf.utils.getColorHSV("red") page.insert_textbox( rect, text, align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, color=blue, oc=ocg, ) # check text containment assert page.get_text() == page.get_text(clip=rect) def test_textbox3(): """Use TextWriter for text insertion.""" doc = pymupdf.open() page = doc.new_page() font = pymupdf.Font("cjk") rect = pymupdf.Rect(50, 50, 400, 400) blue = (0, 0, 1) tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, align=pymupdf.TEXT_ALIGN_LEFT, font=font, fontsize=12, right_to_left=True, ) tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) doc.scrub() doc.subset_fonts() def test_textbox4(): """Use TextWriter for text insertion.""" doc = pymupdf.open() ocg = doc.add_ocg("ocg1") page = doc.new_page() rect = pymupdf.Rect(50, 50, 400, 600) blue = (0, 0, 1) tw = pymupdf.TextWriter(page.rect, color=blue) tw.fill_textbox( rect, text, align=pymupdf.TEXT_ALIGN_LEFT, fontsize=12, font=pymupdf.Font("cour"), right_to_left=True, ) tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1))) # check text containment assert page.get_text() == page.get_text(clip=rect) def test_textbox5(): """Using basic text insertion.""" small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights() pymupdf.TOOLS.set_small_glyph_heights(True) try: doc = pymupdf.open() page = doc.new_page() r = pymupdf.Rect(100, 100, 150, 150) text = "words and words and words and more words..." rc = -1 fontsize = 12 page.draw_rect(r) while rc < 0: rc = page.insert_textbox( r, text, fontsize=fontsize, align=pymupdf.TEXT_ALIGN_JUSTIFY, ) fontsize -= 0.5 blocks = page.get_text("blocks") bbox = pymupdf.Rect(blocks[0][:4]) assert bbox in r finally: # Must restore small_glyph_heights, otherwise other tests can fail. pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0) def test_2637(): """Ensure correct calculation of fitting text.""" doc = pymupdf.open() page = doc.new_page() text = ( "The morning sun painted the sky with hues of orange and pink. " "Birds chirped harmoniously, greeting the new day. " "Nature awakened, filling the air with life and promise." ) rect = pymupdf.Rect(50, 50, 500, 280) fontsize = 50 rc = -1 while rc < 0: # look for largest font size that makes the text fit rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize) fontsize -= 1 # confirm text won't lap outside rect blocks = page.get_text("blocks") bbox = pymupdf.Rect(blocks[0][:4]) assert bbox in rect def test_htmlbox1(): """Write HTML-styled text into a rect with different rotations. The text is styled and contains a link. Then extract the text again, and - assert that text was written in the 4 different angles, - assert that text properties are correct (bold, italic, color), - assert that the link has been correctly inserted. We try to insert into a rectangle that is too small, setting scale=False and confirming we have a negative return code. """ if not hasattr(pymupdf, "mupdf"): print("'test_htmlbox1' not executed in classic.") return rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.""" text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation <b>ullamco</b> <i>laboris</i> nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in <span style="color: #0f0;font-weight:bold;">voluptate</span> velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui <a href="https://www.artifex.com">officia</a> deserunt mollit anim id est laborum.""" doc = pymupdf.Document() for rot in (0, 90, 180, 270): wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions page = doc.new_page() spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1) assert spare_height < 0 assert scale == 1 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0) page.draw_rect(rect, (1, 0, 0)) doc.save(os.path.normpath(f'{__file__}/../../tests/test_htmlbox1.pdf')) assert abs(spare_height - 3.8507) < 0.001 assert 0 < scale < 1 page = doc.reload_page(page) link = page.get_links()[0] # extracts the links on the page assert link["uri"] == "https://www.artifex.com" # Assert plain text is complete. # We must remove line breaks and any ligatures for this. assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ") encounters = 0 # counts the words with selected properties for b in page.get_text("dict")["blocks"]: for l in b["lines"]: wdir = l["dir"] # writing direction assert wdir == wdirs[page.number] for s in l["spans"]: stext = s["text"] color = pymupdf.sRGB_to_pdf(s["color"]) bold = bool(s["flags"] & 16) italic = bool(s["flags"] & 2) if stext in ("ullamco", "laboris", "voluptate"): encounters += 1 if stext == "ullamco": assert bold is True assert italic is False assert color == pymupdf.pdfcolor["black"] elif stext == "laboris": assert bold is False assert italic is True assert color == pymupdf.pdfcolor["black"] elif stext == "voluptate": assert bold is True assert italic is False assert color == pymupdf.pdfcolor["green"] else: assert bold is False assert italic is False # all 3 special special words were encountered assert encounters == 3 def test_htmlbox2(): """Test insertion without scaling""" if not hasattr(pymupdf, "mupdf"): print("'test_htmlbox2' not executed in classic.") return doc = pymupdf.open() rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text page = doc.new_page() bottoms = set() for rot in (0, 90, 180, 270): spare_height, scale = page.insert_htmlbox( rect, "Hello, World!", scale_low=1, rotate=rot ) assert scale == 1 assert 0 < spare_height < rect.height bottoms.add(spare_height) assert len(bottoms) == 1 # same result for all rotations def test_htmlbox3(): """Test insertion with opacity""" if not hasattr(pymupdf, "mupdf"): print("'test_htmlbox3' not executed in classic.") return rect = pymupdf.Rect(100, 250, 300, 350) text = """<span style="color:red;font-size:20px;">Just some text.</span>""" doc = pymupdf.open() page = doc.new_page() # insert some text with opacity page.insert_htmlbox(rect, text, opacity=0.5) # lowlevel-extract inserted text to access opacity span = page.get_texttrace()[0] assert span["opacity"] == 0.5 def test_3559(): doc = pymupdf.Document() page = doc.new_page() text_insert="""<body><h3></h3></body>""" rect = pymupdf.Rect(100, 100, 200, 200) page.insert_htmlbox(rect, text_insert) def test_3916(): doc = pymupdf.open() rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text. page = doc.new_page() spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5) assert spare_height == -1 def test_4400(): with pymupdf.open() as document: page = document.new_page() writer = pymupdf.TextWriter(page.rect) text = '111111111' print(f'Calling writer.fill_textbox().', flush=1) writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8) def test_4613(): print() text = 3 * 'abcdefghijklmnopqrstuvwxyz\nABCDEFGHIJKLMNOPQRSTUVWXYZ\n' story = pymupdf.Story(text) rect = pymupdf.Rect(10, 10, 100, 100) # Test default operation where we get additional scaling down because of # the long words in our text. print(f'test_4613(): ### Testing default operation.') with pymupdf.open() as doc: page = doc.new_page() spare_height, scale = page.insert_htmlbox(rect, story) print(f'test_4613(): {spare_height=} {scale=}') # The additional down-scaling from the long word widths results in # spare vertical space. page.draw_rect(rect, (1, 0, 0)) path = os.path.normpath(f'{__file__}/../../tests/test_4613.pdf') doc.save(path) path_pixmap = os.path.normpath(f'{__file__}/../../tests/test_4613.png') path_pixmap_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4613.png') pixmap = page.get_pixmap(dpi=300) pixmap.save(path_pixmap) pixmap_diff = gentle_compare.pixmaps_diff(path_pixmap_expected, pixmap) pixmap_diff.save(os.path.normpath(f'{__file__}/../../tests/test_4613-diff.png')) rms = gentle_compare.pixmaps_rms(pixmap, path_pixmap_expected) print(f'{rms=}') assert rms == 0, f'{rms=}' assert abs(spare_height - 45.7536) < 0.1 assert abs(scale - 0.4009) < 0.01 new_text = page.get_text('text', clip=rect) print(f'test_4613(): new_text:') print(textwrap.indent(new_text, ' ')) assert new_text == text # Check with _scale_word_width=False - ignore too-wide words. print(f'test_4613(): ### Testing with _scale_word_width=False.') with pymupdf.open() as doc: page = doc.new_page() spare_height, scale = page.insert_htmlbox(rect, story, _scale_word_width=False) print(f'test_4613(): _scale_word_width=False: {spare_height=} {scale=}') # With _scale_word_width=False we allow long words to extend beyond the # rect, so we should have spare_height == 0 and only a small amount of # down-scaling. assert spare_height == 0 assert abs(scale - 0.914) < 0.01 new_text = page.get_text('text', clip=rect) print(f'test_4613(): new_text:') print(textwrap.indent(new_text, ' ')) assert new_text == textwrap.dedent(''' abcdefghijklmno ABCDEFGHIJKLM abcdefghijklmno ABCDEFGHIJKLM abcdefghijklmno ABCDEFGHIJKLM ''')[1:] # Check that we get no fit if scale_low is not low enough. print(f'test_4613(): ### Testing with scale_low too high to allow a fit.') with pymupdf.open() as doc: page = doc.new_page() scale_low=0.6 spare_height, scale = page.insert_htmlbox(rect, story, scale_low=scale_low) print(f'test_4613(): {scale_low=}: {spare_height=} {scale=}') assert spare_height == -1 assert scale == scale_low
