comparison tests/test_textbox.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children a6bc019ac0b2
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 """
2 Fill a given text in a rectangle on some PDF page using
3 1. TextWriter object
4 2. Basic text output
5
6 Check text is indeed contained in given rectangle.
7 """
8 import pymupdf
9
10 # codespell:ignore-begin
11 text = """Der Kleine Schwertwal (Pseudorca crassidens), auch bekannt als Unechter oder Schwarzer Schwertwal, ist eine Art der Delfine (Delphinidae) und der einzige rezente Vertreter der Gattung Pseudorca.
12
13 Er ähnelt dem Orca in Form und Proportionen, ist aber einfarbig schwarz und mit einer Maximallänge von etwa sechs Metern deutlich kleiner.
14
15 Kleine Schwertwale bilden Schulen von durchschnittlich zehn bis fünfzig Tieren, wobei sie sich auch mit anderen Delfinen vergesellschaften und sich meistens abseits der Küsten aufhalten.
16
17 Sie sind in allen Ozeanen gemäßigter, subtropischer und tropischer Breiten beheimatet, sind jedoch vor allem in wärmeren Jahreszeiten auch bis in die gemäßigte bis subpolare Zone südlich der Südspitze Südamerikas, vor Nordeuropa und bis vor Kanada anzutreffen."""
18 # codespell:ignore-end
19
20 def test_textbox1():
21 """Use TextWriter for text insertion."""
22 doc = pymupdf.open()
23 page = doc.new_page()
24 rect = pymupdf.Rect(50, 50, 400, 400)
25 blue = (0, 0, 1)
26 tw = pymupdf.TextWriter(page.rect, color=blue)
27 tw.fill_textbox(
28 rect,
29 text,
30 align=pymupdf.TEXT_ALIGN_LEFT,
31 fontsize=12,
32 )
33 tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
34 # check text containment
35 assert page.get_text() == page.get_text(clip=rect)
36 page.write_text(writers=tw)
37
38
39 def test_textbox2():
40 """Use basic text insertion."""
41 doc = pymupdf.open()
42 ocg = doc.add_ocg("ocg1")
43 page = doc.new_page()
44 rect = pymupdf.Rect(50, 50, 400, 400)
45 blue = pymupdf.utils.getColor("lightblue")
46 red = pymupdf.utils.getColorHSV("red")
47 page.insert_textbox(
48 rect,
49 text,
50 align=pymupdf.TEXT_ALIGN_LEFT,
51 fontsize=12,
52 color=blue,
53 oc=ocg,
54 )
55 # check text containment
56 assert page.get_text() == page.get_text(clip=rect)
57
58
59 def test_textbox3():
60 """Use TextWriter for text insertion."""
61 doc = pymupdf.open()
62 page = doc.new_page()
63 font = pymupdf.Font("cjk")
64 rect = pymupdf.Rect(50, 50, 400, 400)
65 blue = (0, 0, 1)
66 tw = pymupdf.TextWriter(page.rect, color=blue)
67 tw.fill_textbox(
68 rect,
69 text,
70 align=pymupdf.TEXT_ALIGN_LEFT,
71 font=font,
72 fontsize=12,
73 right_to_left=True,
74 )
75 tw.write_text(page, morph=(rect.tl, pymupdf.Matrix(1, 1)))
76 # check text containment
77 assert page.get_text() == page.get_text(clip=rect)
78 doc.scrub()
79 doc.subset_fonts()
80
81
82 def test_textbox4():
83 """Use TextWriter for text insertion."""
84 doc = pymupdf.open()
85 ocg = doc.add_ocg("ocg1")
86 page = doc.new_page()
87 rect = pymupdf.Rect(50, 50, 400, 600)
88 blue = (0, 0, 1)
89 tw = pymupdf.TextWriter(page.rect, color=blue)
90 tw.fill_textbox(
91 rect,
92 text,
93 align=pymupdf.TEXT_ALIGN_LEFT,
94 fontsize=12,
95 font=pymupdf.Font("cour"),
96 right_to_left=True,
97 )
98 tw.write_text(page, oc=ocg, morph=(rect.tl, pymupdf.Matrix(1, 1)))
99 # check text containment
100 assert page.get_text() == page.get_text(clip=rect)
101
102
103 def test_textbox5():
104 """Using basic text insertion."""
105 small_glyph_heights0 = pymupdf.TOOLS.set_small_glyph_heights()
106 pymupdf.TOOLS.set_small_glyph_heights(True)
107 try:
108 doc = pymupdf.open()
109 page = doc.new_page()
110 r = pymupdf.Rect(100, 100, 150, 150)
111 text = "words and words and words and more words..."
112 rc = -1
113 fontsize = 12
114 page.draw_rect(r)
115 while rc < 0:
116 rc = page.insert_textbox(
117 r,
118 text,
119 fontsize=fontsize,
120 align=pymupdf.TEXT_ALIGN_JUSTIFY,
121 )
122 fontsize -= 0.5
123
124 blocks = page.get_text("blocks")
125 bbox = pymupdf.Rect(blocks[0][:4])
126 assert bbox in r
127 finally:
128 # Must restore small_glyph_heights, otherwise other tests can fail.
129 pymupdf.TOOLS.set_small_glyph_heights(small_glyph_heights0)
130
131
132 def test_2637():
133 """Ensure correct calculation of fitting text."""
134 doc = pymupdf.open()
135 page = doc.new_page()
136 text = (
137 "The morning sun painted the sky with hues of orange and pink. "
138 "Birds chirped harmoniously, greeting the new day. "
139 "Nature awakened, filling the air with life and promise."
140 )
141 rect = pymupdf.Rect(50, 50, 500, 280)
142 fontsize = 50
143 rc = -1
144 while rc < 0: # look for largest font size that makes the text fit
145 rc = page.insert_textbox(rect, text, fontname="hebo", fontsize=fontsize)
146 fontsize -= 1
147
148 # confirm text won't lap outside rect
149 blocks = page.get_text("blocks")
150 bbox = pymupdf.Rect(blocks[0][:4])
151 assert bbox in rect
152
153
154 def test_htmlbox1():
155 """Write HTML-styled text into a rect with different rotations.
156
157 The text is styled and contains a link.
158 Then extract the text again, and
159 - assert that text was written in the 4 different angles,
160 - assert that text properties are correct (bold, italic, color),
161 - assert that the link has been correctly inserted.
162
163 We try to insert into a rectangle that is too small, setting
164 scale=False and confirming we have a negative return code.
165 """
166 if not hasattr(pymupdf, "mupdf"):
167 print("'test_htmlbox1' not executed in classic.")
168 return
169
170 rect = pymupdf.Rect(100, 100, 200, 200) # this only works with scale=True
171
172 base_text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."""
173
174 text = """Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation <b>ullamco</b> <i>laboris</i> nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in <span style="color: #0f0;font-weight:bold;">voluptate</span> velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui <a href="https://www.artifex.com">officia</a> deserunt mollit anim id est laborum."""
175
176 doc = pymupdf.Document()
177
178 for rot in (0, 90, 180, 270):
179 wdirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) # all writing directions
180 page = doc.new_page()
181 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=1)
182 assert spare_height < 0
183 assert scale == 1
184 spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
185 assert spare_height == 0
186 assert 0 < scale < 1
187 page = doc.reload_page(page)
188 link = page.get_links()[0] # extracts the links on the page
189
190 assert link["uri"] == "https://www.artifex.com"
191
192 # Assert plain text is complete.
193 # We must remove line breaks and any ligatures for this.
194 assert base_text == page.get_text(flags=0)[:-1].replace("\n", " ")
195
196 encounters = 0 # counts the words with selected properties
197 for b in page.get_text("dict")["blocks"]:
198 for l in b["lines"]:
199 wdir = l["dir"] # writing direction
200 assert wdir == wdirs[page.number]
201 for s in l["spans"]:
202 stext = s["text"]
203 color = pymupdf.sRGB_to_pdf(s["color"])
204 bold = bool(s["flags"] & 16)
205 italic = bool(s["flags"] & 2)
206 if stext in ("ullamco", "laboris", "voluptate"):
207 encounters += 1
208 if stext == "ullamco":
209 assert bold is True
210 assert italic is False
211 assert color == pymupdf.pdfcolor["black"]
212 elif stext == "laboris":
213 assert bold is False
214 assert italic is True
215 assert color == pymupdf.pdfcolor["black"]
216 elif stext == "voluptate":
217 assert bold is True
218 assert italic is False
219 assert color == pymupdf.pdfcolor["green"]
220 else:
221 assert bold is False
222 assert italic is False
223 # all 3 special special words were encountered
224 assert encounters == 3
225
226
227 def test_htmlbox2():
228 """Test insertion without scaling"""
229 if not hasattr(pymupdf, "mupdf"):
230 print("'test_htmlbox2' not executed in classic.")
231 return
232
233 doc = pymupdf.open()
234 rect = pymupdf.Rect(100, 100, 200, 200) # large enough to hold text
235 page = doc.new_page()
236 bottoms = set()
237 for rot in (0, 90, 180, 270):
238 spare_height, scale = page.insert_htmlbox(
239 rect, "Hello, World!", scale_low=1, rotate=rot
240 )
241 assert scale == 1
242 assert 0 < spare_height < rect.height
243 bottoms.add(spare_height)
244 assert len(bottoms) == 1 # same result for all rotations
245
246
247 def test_htmlbox3():
248 """Test insertion with opacity"""
249 if not hasattr(pymupdf, "mupdf"):
250 print("'test_htmlbox3' not executed in classic.")
251 return
252
253 rect = pymupdf.Rect(100, 250, 300, 350)
254 text = """<span style="color:red;font-size:20px;">Just some text.</span>"""
255 doc = pymupdf.open()
256 page = doc.new_page()
257
258 # insert some text with opacity
259 page.insert_htmlbox(rect, text, opacity=0.5)
260
261 # lowlevel-extract inserted text to access opacity
262 span = page.get_texttrace()[0]
263 assert span["opacity"] == 0.5
264
265
266 def test_3559():
267 doc = pymupdf.Document()
268 page = doc.new_page()
269 text_insert="""<body><h3></h3></body>"""
270 rect = pymupdf.Rect(100, 100, 200, 200)
271 page.insert_htmlbox(rect, text_insert)
272
273
274 def test_3916():
275 doc = pymupdf.open()
276 rect = pymupdf.Rect(100, 100, 101, 101) # Too small for the text.
277 page = doc.new_page()
278 spare_height, scale = page.insert_htmlbox(rect, "Hello, World!", scale_low=0.5)
279 assert spare_height == -1
280
281
282 def test_4400():
283 with pymupdf.open() as document:
284 page = document.new_page()
285 writer = pymupdf.TextWriter(page.rect)
286 text = '111111111'
287 print(f'Calling writer.fill_textbox().', flush=1)
288 writer.fill_textbox(rect=pymupdf.Rect(0, 0, 100, 20), pos=(80, 0), text=text, fontsize=8)