Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_textbox.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 """ | |
| 2 Fill a given text in a rectangle on some PDF page using | |
| 3 1. TextWriter object | |
| 4 2. Basic text output | |
| 5 | |
| 6 Check text is indeed contained in given rectangle. | |
| 7 """ | |
| 8 import pymupdf | |
| 9 | |
| 10 # codespell:ignore-begin | |
| 11 text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca. | |
| 12 | |
| 13 Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner. | |
| 14 | |
| 15 Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten. | |
| 16 | |
| 17 Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen.""" | |
| 18 # codespell:ignore-end | |
| 19 | |
| 20 def test_textbox1(): | |
| 21 """Use TextWriter for text insertion.""" | |
| 22 doc = pymupdf.open() | |
| 23 page = doc.new_page() | |
| 24 rect = pymupdf.Rect(50, 50, 400, 400) | |
| 25 blue = (0, 0, 1) | |
| 26 tw = pymupdf.TextWriter(page.rect, color=blue) | |
| 27 tw.fill_textbox( | |
| 28 rect, | |
| 29 text, | |
| 30 align=pymupdf.TEXT_ALIGN_LEFT, | |
| 31 fontsize=12, | |
| 32 ) | |
| 33 tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) | |
| 34 # check text containment | |
| 35 assert page.get_text() == page.get_text(clip=rect) | |
| 36 page.write_text(writers=tw) | |
| 37 | |
| 38 | |
| 39 def test_textbox2(): | |
| 40 """Use basic text insertion.""" | |
| 41 doc = pymupdf.open() | |
| 42 ocg = doc.add_ocg("ocg1") | |
| 43 page = doc.new_page() | |
| 44 rect = pymupdf.Rect(50, 50, 400, 400) | |
| 45 blue = pymupdf.utils.getColor("lightblue") | |
| 46 red = pymupdf.utils.getColorHSV("red") | |
| 47 page.insert_textbox( | |
| 48 rect, | |
| 49 text, | |
| 50 align=pymupdf.TEXT_ALIGN_LEFT, | |
| 51 fontsize=12, | |
| 52 color=blue, | |
| 53 oc=ocg, | |
| 54 ) | |
| 55 # check text containment | |
| 56 assert page.get_text() == page.get_text(clip=rect) | |
| 57 | |
| 58 | |
| 59 def test_textbox3(): | |
| 60 """Use TextWriter for text insertion.""" | |
| 61 doc = pymupdf.open() | |
| 62 page = doc.new_page() | |
| 63 font = pymupdf.Font("cjk") | |
| 64 rect = pymupdf.Rect(50, 50, 400, 400) | |
| 65 blue = (0, 0, 1) | |
| 66 tw = pymupdf.TextWriter(page.rect, color=blue) | |
| 67 tw.fill_textbox( | |
| 68 rect, | |
| 69 text, | |
| 70 align=pymupdf.TEXT_ALIGN_LEFT, | |
| 71 font=font, | |
| 72 fontsize=12, | |
| 73 right_to_left=True, | |
| 74 ) | |
| 75 tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1))) | |
| 76 # check text containment | |
| 77 assert page.get_text() == page.get_text(clip=rect) | |
| 78 doc.scrub() | |
| 79 doc.subset_fonts() | |
| 80 | |
| 81 | |
| 82 def test_textbox4(): | |
| 83 """Use TextWriter for text insertion.""" | |
| 84 doc = pymupdf.open() | |
| 85 ocg = doc.add_ocg("ocg1") | |
| 86 page = doc.new_page() | |
| 87 rect = pymupdf.Rect(50, 50, 400, 600) | |
| 88 blue = (0, 0, 1) | |
| 89 tw = pymupdf.TextWriter(page.rect, color=blue) | |
| 90 tw.fill_textbox( | |
| 91 rect, | |
| 92 text, | |
| 93 align=pymupdf.TEXT_ALIGN_LEFT, | |
| 94 fontsize=12, | |
| 95 font=pymupdf.Font("cour"), | |
| 96 right_to_left=True, | |
| 97 ) | |
| 98 tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1))) | |
| 99 # check text containment | |
| 100 assert page.get_text() == page.get_text(clip=rect) | |
| 101 | |
| 102 | |
| 103 def test_textbox5(): | |
| 104 """Using basic text insertion.""" | |
| 105 small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights() | |
| 106 pymupdf.TOOLS.set_small_glyph_heights(True) | |
| 107 try: | |
| 108 doc = pymupdf.open() | |
| 109 page = doc.new_page() | |
| 110 r = pymupdf.Rect(100, 100, 150, 150) | |
| 111 text = "words and words and words and more words..." | |
| 112 rc = -1 | |
| 113 fontsize = 12 | |
| 114 page.draw_rect(r) | |
| 115 while rc < 0: | |
| 116 rc = page.insert_textbox( | |
| 117 r, | |
| 118 text, | |
| 119 fontsize=fontsize, | |
| 120 align=pymupdf.TEXT_ALIGN_JUSTIFY, | |
| 121 ) | |
| 122 fontsize -= 0.5 | |
| 123 | |
| 124 blocks = page.get_text("blocks") | |
| 125 bbox = pymupdf.Rect(blocks[0][:4]) | |
| 126 assert bbox in r | |
| 127 finally: | |
| 128 # Must restore small_glyph_heights, otherwise other tests can fail. | |
| 129 pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0) | |
| 130 | |
| 131 | |
| 132 def test_2637(): | |
| 133 """Ensure correct calculation of fitting text.""" | |
| 134 doc = pymupdf.open() | |
| 135 page = doc.new_page() | |
| 136 text = ( | |
| 137 "The morning sun painted the sky with hues of orange and pink. " | |
| 138 "Birds chirped harmoniously, greeting the new day. " | |
| 139 "Nature awakened, filling the air with life and promise." | |
| 140 ) | |
| 141 rect = pymupdf.Rect(50, 50, 500, 280) | |
| 142 fontsize = 50 | |
| 143 rc = -1 | |
| 144 while rc < 0: # look for largest font size that makes the text fit | |
| 145 rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize) | |
| 146 fontsize -= 1 | |
| 147 | |
| 148 # confirm text won't lap outside rect | |
| 149 blocks = page.get_text("blocks") | |
| 150 bbox = pymupdf.Rect(blocks[0][:4]) | |
| 151 assert bbox in rect | |
| 152 | |
| 153 | |
| 154 def test_htmlbox1(): | |
| 155 """Write HTML-styled text into a rect with different rotations. | |
| 156 | |
| 157 The text is styled and contains a link. | |
| 158 Then extract the text again, and | |
| 159 - assert that text was written in the 4 different angles, | |
| 160 - assert that text properties are correct (bold, italic, color), | |
| 161 - assert that the link has been correctly inserted. | |
| 162 | |
| 163 We try to insert into a rectangle that is too small, setting | |
| 164 scale=False and confirming we have a negative return code. | |
| 165 """ | |
| 166 if not hasattr(pymupdf, "mupdf"): | |
| 167 print("'test_htmlbox1' not executed in classic.") | |
| 168 return | |
| 169 | |
| 170 rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True | |
| 171 | |
| 172 base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.""" | |
| 173 | |
| 174 text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation <b>ullamco</b> <i>laboris</i> nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in <span style="color: #0f0;font-weight:bold;">voluptate</span> velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui <a href="https://www.artifex.com">officia</a> deserunt mollit anim id est laborum.""" | |
| 175 | |
| 176 doc = pymupdf.Document() | |
| 177 | |
| 178 for rot in (0, 90, 180, 270): | |
| 179 wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions | |
| 180 page = doc.new_page() | |
| 181 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1) | |
| 182 assert spare_height < 0 | |
| 183 assert scale == 1 | |
| 184 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0) | |
| 185 assert spare_height == 0 | |
| 186 assert 0 < scale < 1 | |
| 187 page = doc.reload_page(page) | |
| 188 link = page.get_links()[0] # extracts the links on the page | |
| 189 | |
| 190 assert link["uri"] == "https://www.artifex.com" | |
| 191 | |
| 192 # Assert plain text is complete. | |
| 193 # We must remove line breaks and any ligatures for this. | |
| 194 assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ") | |
| 195 | |
| 196 encounters = 0 # counts the words with selected properties | |
| 197 for b in page.get_text("dict")["blocks"]: | |
| 198 for l in b["lines"]: | |
| 199 wdir = l["dir"] # writing direction | |
| 200 assert wdir == wdirs[page.number] | |
| 201 for s in l["spans"]: | |
| 202 stext = s["text"] | |
| 203 color = pymupdf.sRGB_to_pdf(s["color"]) | |
| 204 bold = bool(s["flags"] & 16) | |
| 205 italic = bool(s["flags"] & 2) | |
| 206 if stext in ("ullamco", "laboris", "voluptate"): | |
| 207 encounters += 1 | |
| 208 if stext == "ullamco": | |
| 209 assert bold is True | |
| 210 assert italic is False | |
| 211 assert color == pymupdf.pdfcolor["black"] | |
| 212 elif stext == "laboris": | |
| 213 assert bold is False | |
| 214 assert italic is True | |
| 215 assert color == pymupdf.pdfcolor["black"] | |
| 216 elif stext == "voluptate": | |
| 217 assert bold is True | |
| 218 assert italic is False | |
| 219 assert color == pymupdf.pdfcolor["green"] | |
| 220 else: | |
| 221 assert bold is False | |
| 222 assert italic is False | |
| 223 # all 3 special special words were encountered | |
| 224 assert encounters == 3 | |
| 225 | |
| 226 | |
| 227 def test_htmlbox2(): | |
| 228 """Test insertion without scaling""" | |
| 229 if not hasattr(pymupdf, "mupdf"): | |
| 230 print("'test_htmlbox2' not executed in classic.") | |
| 231 return | |
| 232 | |
| 233 doc = pymupdf.open() | |
| 234 rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text | |
| 235 page = doc.new_page() | |
| 236 bottoms = set() | |
| 237 for rot in (0, 90, 180, 270): | |
| 238 spare_height, scale = page.insert_htmlbox( | |
| 239 rect, "Hello, World!", scale_low=1, rotate=rot | |
| 240 ) | |
| 241 assert scale == 1 | |
| 242 assert 0 < spare_height < rect.height | |
| 243 bottoms.add(spare_height) | |
| 244 assert len(bottoms) == 1 # same result for all rotations | |
| 245 | |
| 246 | |
| 247 def test_htmlbox3(): | |
| 248 """Test insertion with opacity""" | |
| 249 if not hasattr(pymupdf, "mupdf"): | |
| 250 print("'test_htmlbox3' not executed in classic.") | |
| 251 return | |
| 252 | |
| 253 rect = pymupdf.Rect(100, 250, 300, 350) | |
| 254 text = """<span style="color:red;font-size:20px;">Just some text.</span>""" | |
| 255 doc = pymupdf.open() | |
| 256 page = doc.new_page() | |
| 257 | |
| 258 # insert some text with opacity | |
| 259 page.insert_htmlbox(rect, text, opacity=0.5) | |
| 260 | |
| 261 # lowlevel-extract inserted text to access opacity | |
| 262 span = page.get_texttrace()[0] | |
| 263 assert span["opacity"] == 0.5 | |
| 264 | |
| 265 | |
| 266 def test_3559(): | |
| 267 doc = pymupdf.Document() | |
| 268 page = doc.new_page() | |
| 269 text_insert="""<body><h3></h3></body>""" | |
| 270 rect = pymupdf.Rect(100, 100, 200, 200) | |
| 271 page.insert_htmlbox(rect, text_insert) | |
| 272 | |
| 273 | |
| 274 def test_3916(): | |
| 275 doc = pymupdf.open() | |
| 276 rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text. | |
| 277 page = doc.new_page() | |
| 278 spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5) | |
| 279 assert spare_height == -1 | |
| 280 | |
| 281 | |
| 282 def test_4400(): | |
| 283 with pymupdf.open() as document: | |
| 284 page = document.new_page() | |
| 285 writer = pymupdf.TextWriter(page.rect) | |
| 286 text = '111111111' | |
| 287 print(f'Calling writer.fill_textbox().', flush=1) | |
| 288 writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8) |
