comparison tests/test_general.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children a6bc019ac0b2
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 # encoding utf-8
2 """
3 * Confirm sample doc has no links and no annots.
4 * Confirm proper release of file handles via Document.close()
5 * Confirm properly raising exceptions in document creation
6 """
7 import io
8 import os
9
10 import fnmatch
11 import json
12 import pymupdf
13 import pathlib
14 import pickle
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import sys
20 import textwrap
21 import time
22 import util
23
24 import gentle_compare
25
26 scriptdir = os.path.abspath(os.path.dirname(__file__))
27 filename = os.path.join(scriptdir, "resources", "001003ED.pdf")
28
29
30 def test_haslinks():
31 doc = pymupdf.open(filename)
32 assert doc.has_links() == False
33
34
35 def test_hasannots():
36 doc = pymupdf.open(filename)
37 assert doc.has_annots() == False
38
39
40 def test_haswidgets():
41 doc = pymupdf.open(filename)
42 assert doc.is_form_pdf == False
43
44
45 def test_isrepaired():
46 doc = pymupdf.open(filename)
47 assert doc.is_repaired == False
48 pymupdf.TOOLS.mupdf_warnings()
49
50
51 def test_isdirty():
52 doc = pymupdf.open(filename)
53 assert doc.is_dirty == False
54
55
56 def test_cansaveincrementally():
57 doc = pymupdf.open(filename)
58 assert doc.can_save_incrementally() == True
59
60
61 def test_iswrapped():
62 doc = pymupdf.open(filename)
63 page = doc[0]
64 assert page.is_wrapped
65 wt = pymupdf.TOOLS.mupdf_warnings()
66 if pymupdf.mupdf_version_tuple >= (1, 26, 0):
67 assert wt == 'bogus font ascent/descent values (0 / 0)'
68 else:
69 assert not wt
70
71
72 def test_wrapcontents():
73 doc = pymupdf.open(filename)
74 page = doc[0]
75 page.wrap_contents()
76 xref = page.get_contents()[0]
77 cont = page.read_contents()
78 doc.update_stream(xref, cont)
79 page.set_contents(xref)
80 assert len(page.get_contents()) == 1
81 page.clean_contents()
82 rebased = hasattr(pymupdf, 'mupdf')
83 if rebased:
84 wt = pymupdf.TOOLS.mupdf_warnings()
85 if pymupdf.mupdf_version_tuple >= (1, 26, 0):
86 assert wt == 'bogus font ascent/descent values (0 / 0)\nPDF stream Length incorrect'
87 else:
88 assert wt == 'PDF stream Length incorrect'
89
90
91 def test_page_clean_contents():
92 """Assert that page contents cleaning actually is invoked."""
93 doc = pymupdf.open()
94 page = doc.new_page()
95
96 # draw two rectangles - will lead to two /Contents objects
97 page.draw_rect((10, 10, 20, 20))
98 page.draw_rect((20, 20, 30, 30))
99 assert len(page.get_contents()) == 2
100 assert page.read_contents().startswith(b"q") == False
101
102 # clean / consolidate into one /Contents object
103 page.clean_contents()
104 assert len(page.get_contents()) == 1
105 assert page.read_contents().startswith(b"q") == True
106
107
108 def test_annot_clean_contents():
109 """Assert that annot contents cleaning actually is invoked."""
110 doc = pymupdf.open()
111 page = doc.new_page()
112 annot = page.add_highlight_annot((10, 10, 20, 20))
113
114 # the annotation appearance will not start with command b"q"
115
116
117 # invoke appearance stream cleaning and reformatting
118 annot.clean_contents()
119
120 # appearance stream should now indeed start with command b"q"
121 assert annot._getAP().startswith(b"q") == True
122
123
124 def test_config():
125 assert pymupdf.TOOLS.fitz_config["py-memory"] in (True, False)
126
127
128 def test_glyphnames():
129 name = "INFINITY"
130 infinity = pymupdf.glyph_name_to_unicode(name)
131 assert pymupdf.unicode_to_glyph_name(infinity) == name
132
133
134 def test_rgbcodes():
135 sRGB = 0xFFFFFF
136 assert pymupdf.sRGB_to_pdf(sRGB) == (1, 1, 1)
137 assert pymupdf.sRGB_to_rgb(sRGB) == (255, 255, 255)
138
139
140 def test_pdfstring():
141 pymupdf.get_pdf_now()
142 pymupdf.get_pdf_str("Beijing, chinesisch 北京")
143 pymupdf.get_text_length("Beijing, chinesisch 北京", fontname="china-s")
144 pymupdf.get_pdf_str("Latin characters êßöäü")
145
146
147 def test_open_exceptions():
148 path = os.path.normpath(f'{__file__}/../../tests/resources/001003ED.pdf')
149 doc = pymupdf.open(path, filetype="xps")
150 assert 'PDF' in doc.metadata["format"]
151
152 doc = pymupdf.open(path, filetype="xxx")
153 assert 'PDF' in doc.metadata["format"]
154
155 try:
156 pymupdf.open("x.y")
157 except Exception as e:
158 assert repr(e).startswith("FileNotFoundError")
159 else:
160 assert 0
161
162 try:
163 pymupdf.open(stream=b"", filetype="pdf")
164 except RuntimeError as e:
165 assert repr(e).startswith("EmptyFileError"), f'{repr(e)=}'
166 else:
167 print(f'{doc.metadata["format"]=}')
168 assert 0
169
170
171 def test_bug1945():
172 pdf = pymupdf.open(f'{scriptdir}/resources/bug1945.pdf')
173 buffer_ = io.BytesIO()
174 pdf.save(buffer_, clean=True)
175
176
177 def test_bug1971():
178 for _ in range(2):
179 doc = pymupdf.Document(f'{scriptdir}/resources/bug1971.pdf')
180 page = next(doc.pages())
181 page.get_drawings()
182 doc.close()
183 assert doc.is_closed
184
185 def test_default_font():
186 f = pymupdf.Font()
187 assert str(f) == "Font('Noto Serif Regular')"
188 assert repr(f) == "Font('Noto Serif Regular')"
189
190 def test_add_ink_annot():
191 import math
192 document = pymupdf.Document()
193 page = document.new_page()
194 line1 = []
195 line2 = []
196 for a in range( 0, 360*2, 15):
197 x = a
198 c = 300 + 200 * math.cos( a * math.pi/180)
199 s = 300 + 100 * math.sin( a * math.pi/180)
200 line1.append( (x, c))
201 line2.append( (x, s))
202 page.add_ink_annot( [line1, line2])
203 page.insert_text((100, 72), 'Hello world')
204 page.add_text_annot((200,200), "Some Text")
205 page.get_bboxlog()
206 path = f'{scriptdir}/resources/test_add_ink_annot.pdf'
207 document.save( path)
208 print( f'Have saved to: path={path!r}')
209
210 def test_techwriter_append():
211 print(pymupdf.__doc__)
212 doc = pymupdf.open()
213 page = doc.new_page()
214 tw = pymupdf.TextWriter(page.rect)
215 text = "Red rectangle = TextWriter.text_rect, blue circle = .last_point"
216 r = tw.append((100, 100), text)
217 print(f'r={r!r}')
218 tw.write_text(page)
219 page.draw_rect(tw.text_rect, color=pymupdf.pdfcolor["red"])
220 page.draw_circle(tw.last_point, 2, color=pymupdf.pdfcolor["blue"])
221 path = f"{scriptdir}/resources/test_techwriter_append.pdf"
222 doc.ez_save(path)
223 print( f'Have saved to: {path}')
224
225 def test_opacity():
226 doc = pymupdf.open()
227 page = doc.new_page()
228
229 annot1 = page.add_circle_annot((50, 50, 100, 100))
230 annot1.set_colors(fill=(1, 0, 0), stroke=(1, 0, 0))
231 annot1.set_opacity(2 / 3)
232 annot1.update(blend_mode="Multiply")
233
234 annot2 = page.add_circle_annot((75, 75, 125, 125))
235 annot2.set_colors(fill=(0, 0, 1), stroke=(0, 0, 1))
236 annot2.set_opacity(1 / 3)
237 annot2.update(blend_mode="Multiply")
238 outfile = f'{scriptdir}/resources/opacity.pdf'
239 doc.save(outfile, expand=True, pretty=True)
240 print("saved", outfile)
241
242 def test_get_text_dict():
243 import json
244 doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf')
245 page=doc[0]
246 blocks=page.get_text("dict")["blocks"]
247 # Check no opaque types in `blocks`.
248 json.dumps( blocks, indent=4)
249 wt = pymupdf.TOOLS.mupdf_warnings()
250 if pymupdf.mupdf_version_tuple >= (1, 26, 0):
251 assert wt == 'bogus font ascent/descent values (0 / 0)'
252 else:
253 assert not wt
254
255 def test_font():
256 font = pymupdf.Font()
257 print(repr(font))
258 bbox = font.glyph_bbox( 65)
259 print( f'bbox={bbox!r}')
260
261 def test_insert_font():
262 doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf')
263 page = doc[0]
264 i = page.insert_font()
265 print( f'page.insert_font() => {i}')
266
267 def test_2173():
268 from pymupdf import IRect, Pixmap, CS_RGB, Colorspace
269 for i in range( 100):
270 #print( f'i={i!r}')
271 image = Pixmap(Colorspace(CS_RGB), IRect(0, 0, 13, 37))
272 print( 'test_2173() finished')
273
274 def test_texttrace():
275 import time
276 document = pymupdf.Document( f'{scriptdir}/resources/joined.pdf')
277 t = time.time()
278 for page in document:
279 tt = page.get_texttrace()
280 t = time.time() - t
281 print( f'test_texttrace(): t={t!r}')
282
283 # Repeat, this time writing data to file.
284 import json
285 path = f'{scriptdir}/resources/test_texttrace.txt'
286 print( f'test_texttrace(): Writing to: {path}')
287 with open( path, 'w') as f:
288 for i, page in enumerate(document):
289 tt = page.get_texttrace()
290 print( f'page {i} json:\n{json.dumps(tt, indent=" ")}', file=f)
291
292
293 def test_2533():
294 """Assert correct char bbox in page.get_texttrace().
295
296 Search for a unique char on page and confirm that page.get_texttrace()
297 returns the same bbox as the search method.
298 """
299 if hasattr(pymupdf, 'mupdf') and not pymupdf.g_use_extra:
300 print('Not running test_2533() because rebased with use_extra=0 known to fail')
301 return
302 pymupdf.TOOLS.set_small_glyph_heights(True)
303 try:
304 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2533.pdf"))
305 page = doc[0]
306 NEEDLE = "民"
307 ord_NEEDLE = ord(NEEDLE)
308 for span in page.get_texttrace():
309 for char in span["chars"]:
310 if char[0] == ord_NEEDLE:
311 bbox = pymupdf.Rect(char[3])
312 break
313 bbox2 = page.search_for(NEEDLE)[0]
314 assert bbox2 == bbox, f'{bbox=} {bbox2=} {bbox2-bbox=}.'
315 finally:
316 pymupdf.TOOLS.set_small_glyph_heights(False)
317
318
319 def test_2645():
320 """Assert same font size calculation in corner cases.
321 """
322 folder = os.path.join(scriptdir, "resources")
323 files = ("test_2645_1.pdf", "test_2645_2.pdf", "test_2645_3.pdf")
324 for f in files:
325 doc = pymupdf.open(os.path.join(folder, f))
326 page = doc[0]
327 fontsize0 = page.get_texttrace()[0]["size"]
328 fontsize1 = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT)["blocks"][0]["lines"][
329 0
330 ]["spans"][0]["size"]
331 assert abs(fontsize0 - fontsize1) < 1e-5
332
333
334 def test_2506():
335 """Ensure expected font size across text writing angles."""
336 doc = pymupdf.open()
337 page = doc.new_page()
338 point = pymupdf.Point(100, 300) # insertion point
339 fontsize = 11 # fontsize
340 text = "Hello" # text
341 angles = (0, 30, 60, 90, 120) # some angles
342
343 # write text with different angles
344 for angle in angles:
345 page.insert_text(
346 point, text, fontsize=fontsize, morph=(point, pymupdf.Matrix(angle))
347 )
348
349 # ensure correct fontsize for get_texttrace() - forgiving rounding problems
350 for span in page.get_texttrace():
351 print(span["dir"])
352 assert round(span["size"]) == fontsize
353
354 # ensure correct fontsize for get_text() - forgiving rounding problems
355 for block in page.get_text("dict")["blocks"]:
356 for line in block["lines"]:
357 print(line["dir"])
358 for span in line["spans"]:
359 print(span["size"])
360 assert round(span["size"]) == fontsize
361
362
363 def test_2108():
364 doc = pymupdf.open(f'{scriptdir}/resources/test_2108.pdf')
365 page = doc[0]
366 areas = page.search_for("{sig}")
367 rect = areas[0]
368 page.add_redact_annot(rect)
369 page.apply_redactions()
370 text = page.get_text()
371
372 text_expected = b'Frau\nClaire Dunphy\nTeststra\xc3\x9fe 5\n12345 Stadt\nVertragsnummer: 12345\nSehr geehrte Frau Dunphy,\nText\nMit freundlichen Gr\xc3\xbc\xc3\x9fen\nTestfirma\nVertrag:\n 12345\nAnsprechpartner:\nJay Pritchet\nTelefon:\n123456\nE-Mail:\ntest@test.de\nDatum:\n07.12.2022\n'.decode('utf8')
373
374 if 1:
375 # Verbose info.
376 print(f'test_2108(): text is:\n{text}')
377 print(f'')
378 print(f'test_2108(): repr(text) is:\n{text!r}')
379 print(f'')
380 print(f'test_2108(): repr(text.encode("utf8")) is:\n{text.encode("utf8")!r}')
381 print(f'')
382 print(f'test_2108(): text_expected is:\n{text_expected}')
383 print(f'')
384 print(f'test_2108(): repr(text_expected) is:\n{text_expected!r}')
385 print(f'')
386 print(f'test_2108(): repr(text_expected.encode("utf8")) is:\n{text_expected.encode("utf8")!r}')
387
388 ok1 = (text == text_expected)
389 ok2 = (text.encode("utf8") == text_expected.encode("utf8"))
390 ok3 = (repr(text.encode("utf8")) == repr(text_expected.encode("utf8")))
391
392 print(f'')
393 print(f'ok1={ok1}')
394 print(f'ok2={ok2}')
395 print(f'ok3={ok3}')
396
397 print(f'')
398
399 print(f'{pymupdf.mupdf_version_tuple=}')
400 if pymupdf.mupdf_version_tuple >= (1, 21, 2):
401 print('Asserting text==text_expected')
402 assert text == text_expected
403 else:
404 print('Asserting text!=text_expected')
405 assert text != text_expected
406
407
408 def test_2238():
409 filepath = f'{scriptdir}/resources/test2238.pdf'
410 doc = pymupdf.open(filepath)
411 rebased = hasattr(pymupdf, 'mupdf')
412 if rebased:
413 wt = pymupdf.TOOLS.mupdf_warnings()
414 wt_expected = ''
415 if pymupdf.mupdf_version_tuple >= (1, 26):
416 wt_expected += 'garbage bytes before version marker\n'
417 wt_expected += 'syntax error: expected \'obj\' keyword (6 0 ?)\n'
418 else:
419 wt_expected += 'format error: cannot recognize version marker\n'
420 wt_expected += 'trying to repair broken xref\n'
421 wt_expected += 'repairing PDF document'
422 assert wt == wt_expected, f'{wt=}'
423 first_page = doc.load_page(0).get_text('text', clip=pymupdf.INFINITE_RECT())
424 last_page = doc.load_page(-1).get_text('text', clip=pymupdf.INFINITE_RECT())
425
426 print(f'first_page={first_page!r}')
427 print(f'last_page={last_page!r}')
428 assert first_page == 'Hello World\n'
429 assert last_page == 'Hello World\n'
430
431 first_page = doc.load_page(0).get_text('text')
432 last_page = doc.load_page(-1).get_text('text')
433
434 print(f'first_page={first_page!r}')
435 print(f'last_page={last_page!r}')
436 assert first_page == 'Hello World\n'
437 assert last_page == 'Hello World\n'
438
439
440 def test_2093():
441 if platform.python_implementation() == 'GraalVM':
442 print(f'test_2093(): Not running because slow on GraalVM.')
443 return
444
445 doc = pymupdf.open(f'{scriptdir}/resources/test2093.pdf')
446
447 def average_color(page):
448 pixmap = page.get_pixmap()
449 p_average = [0] * pixmap.n
450 for y in range(pixmap.height):
451 for x in range(pixmap.width):
452 p = pixmap.pixel(x, y)
453 for i in range(pixmap.n):
454 p_average[i] += p[i]
455 for i in range(pixmap.n):
456 p_average[i] /= (pixmap.height * pixmap.width)
457 return p_average
458
459 page = doc.load_page(0)
460 pixel_average_before = average_color(page)
461
462 rx=135.123
463 ry=123.56878
464 rw=69.8409
465 rh=9.46397
466
467 x0 = rx
468 y0 = ry
469 x1 = rx + rw
470 y1 = ry + rh
471
472 rect = pymupdf.Rect(x0, y0, x1, y1)
473
474 font = pymupdf.Font("Helvetica")
475 fill_color=(0,0,0)
476 page.add_redact_annot(
477 quad=rect,
478 #text="null",
479 fontname=font.name,
480 fontsize=12,
481 align=pymupdf.TEXT_ALIGN_CENTER,
482 fill=fill_color,
483 text_color=(1,1,1),
484 )
485
486 page.apply_redactions()
487 pixel_average_after = average_color(page)
488
489 print(f'pixel_average_before={pixel_average_before!r}')
490 print(f'pixel_average_after={pixel_average_after!r}')
491
492 # Before this bug was fixed (MuPDF-1.22):
493 # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174]
494 # pixel_average_after=[138.68844553555772, 123.05687162237561, 100.74275056194105]
495 # After fix:
496 # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174]
497 # pixel_average_after=[130.8889209934799, 115.25722751837269, 92.94327384463327]
498 #
499 for i in range(len(pixel_average_before)):
500 diff = pixel_average_before[i] - pixel_average_after[i]
501 assert abs(diff) < 0.1
502
503 out = f'{scriptdir}/resources/test2093-out.pdf'
504 doc.save(out)
505 print(f'Have written to: {out}')
506
507
508 def test_2182():
509 print(f'test_2182() started')
510 doc = pymupdf.open(f'{scriptdir}/resources/test2182.pdf')
511 page = doc[0]
512 for annot in page.annots():
513 print(annot)
514 print(f'test_2182() finished')
515
516
517 def test_2246():
518 """
519 Test / confirm identical text positions generated by
520 * page.insert_text()
521 versus
522 * TextWriter.write_text()
523
524 ... under varying situations as follows:
525
526 1. MediaBox does not start at (0, 0)
527 2. CropBox origin is different from that of MediaBox
528 3. Check for all 4 possible page rotations
529
530 The test writes the same text at the same positions using `page.insert_text()`,
531 respectively `TextWriter.write_text()`.
532 Then extracts the text spans and confirms that they all occupy the same bbox.
533 This ensures coincidence of text positions of page.of insert_text()
534 (which is assumed correct) and TextWriter.write_text().
535 """
536 def bbox_count(rot):
537 """Make a page and insert identical text via different methods.
538
539 Desired page rotation is a parameter. MediaBox and CropBox are chosen
540 to be "awkward": MediaBox does not start at (0,0) and CropBox is a
541 true subset of MediaBox.
542 """
543 # bboxes of spans on page: same text positions are represented by ONE bbox
544 bboxes = set()
545 doc = pymupdf.open()
546 # prepare a page with desired MediaBox / CropBox peculiarities
547 mediabox = pymupdf.paper_rect("letter")
548 page = doc.new_page(width=mediabox.width, height=mediabox.height)
549 xref = page.xref
550 newmbox = list(map(float, doc.xref_get_key(xref, "MediaBox")[1][1:-1].split()))
551 newmbox = pymupdf.Rect(newmbox)
552 mbox = newmbox + (10, 20, 10, 20)
553 cbox = mbox + (10, 10, -10, -10)
554 doc.xref_set_key(xref, "MediaBox", "[%g %g %g %g]" % tuple(mbox))
555 doc.xref_set_key(xref, "CrobBox", "[%g %g %g %g]" % tuple(cbox))
556 # set page to desired rotation
557 page.set_rotation(rot)
558 page.insert_text((50, 50), "Text inserted at (50,50)")
559 tw = pymupdf.TextWriter(page.rect)
560 tw.append((50, 50), "Text inserted at (50,50)")
561 tw.write_text(page)
562 blocks = page.get_text("dict")["blocks"]
563 for b in blocks:
564 for l in b["lines"]:
565 for s in l["spans"]:
566 # store bbox rounded to 3 decimal places
567 bboxes.add(pymupdf.Rect(pymupdf.JM_TUPLE3(s["bbox"])))
568 return len(bboxes) # should be 1!
569
570 # the following tests must all pass
571 assert bbox_count(0) == 1
572 assert bbox_count(90) == 1
573 assert bbox_count(180) == 1
574 assert bbox_count(270) == 1
575
576
577 def test_2430():
578 """Confirm that multiple font property checks will not destroy Py_None."""
579 font = pymupdf.Font("helv")
580 for i in range(1000):
581 _ = font.flags
582
583 def test_2692():
584 document = pymupdf.Document(f'{scriptdir}/resources/2.pdf')
585 for page in document:
586 pix = page.get_pixmap(clip=pymupdf.Rect(0,0,10,10))
587 dl = page.get_displaylist(annots=True)
588 pix = dl.get_pixmap(
589 matrix=pymupdf.Identity,
590 colorspace=pymupdf.csRGB,
591 alpha=False,
592 clip=pymupdf.Rect(0,0,10,10),
593 )
594 pix = dl.get_pixmap(
595 matrix=pymupdf.Identity,
596 #colorspace=pymupdf.csRGB,
597 alpha=False,
598 clip=pymupdf.Rect(0,0,10,10),
599 )
600
601
602 def test_2596():
603 """Confirm correctly abandoning cache when reloading a page."""
604 if platform.python_implementation() == 'GraalVM':
605 print(f'test_2596(): not running on Graal.')
606 return
607 doc = pymupdf.Document(f"{scriptdir}/resources/test_2596.pdf")
608 page = doc[0]
609 pix0 = page.get_pixmap() # render the page
610 _ = doc.tobytes(garbage=3) # save with garbage collection
611
612 # Note this will invalidate cache content for this page.
613 # Reloading the page now empties the cache, so rendering
614 # will deliver the same pixmap
615 page = doc.reload_page(page)
616 pix1 = page.get_pixmap()
617 assert pix1.samples == pix0.samples
618 rebased = hasattr(pymupdf, 'mupdf')
619 if pymupdf.mupdf_version_tuple < (1, 26, 6):
620 wt = pymupdf.TOOLS.mupdf_warnings()
621 assert wt == 'too many indirections (possible indirection cycle involving 24 0 R)'
622
623
624 def test_2730():
625 """Ensure identical output across text extractions."""
626 doc = pymupdf.open(f"{scriptdir}/resources/test_2730.pdf")
627 page = doc[0]
628 s1 = set(page.get_text()) # plain text extraction
629 s2 = set(page.get_text(sort=True)) # uses "blocks" extraction
630 s3 = set(page.get_textbox(page.rect))
631 assert s1 == s2
632 assert s1 == s3
633
634
635 def test_2553():
636 """Ensure identical output across text extractions."""
637 verbose = 0
638 doc = pymupdf.open(f"{scriptdir}/resources/test_2553.pdf")
639 page = doc[0]
640
641 # extract plain text, build set of all characters
642 list1 = page.get_text()
643 set1 = set(list1)
644
645 # extract text blocks, build set of all characters
646 list2 = page.get_text(sort=True) # internally uses "blocks"
647 set2 = set(list2)
648
649 # extract textbox content, build set of all characters
650 list3 = page.get_textbox(page.rect)
651 set3 = set(list3)
652
653 def show(l):
654 ret = f'len={len(l)}\n'
655 for c in l:
656 cc = ord(c)
657 if (cc >= 32 and cc < 127) or c == '\n':
658 ret += c
659 else:
660 ret += f' [0x{hex(cc)}]'
661 return ret
662
663 if verbose:
664 print(f'list1:\n{show(list1)}')
665 print(f'list2:\n{show(list2)}')
666 print(f'list3:\n{show(list3)}')
667
668 # all sets must be equal
669 assert set1 == set2
670 assert set1 == set3
671
672 # With mupdf later than 1.23.4, this special page contains no invalid
673 # Unicodes.
674 #
675 print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.')
676 assert chr(0xFFFD) not in set1
677
678 def test_2553_2():
679 doc = pymupdf.open(f"{scriptdir}/resources/test_2553-2.pdf")
680 page = doc[0]
681
682 # extract plain text, ensure that there are no 0xFFFD characters
683 text = page.get_text()
684 assert chr(0xfffd) not in text
685
686 def test_2635():
687 """Rendering a page before and after cleaning it should yield the same pixmap."""
688 doc = pymupdf.open(f"{scriptdir}/resources/test_2635.pdf")
689 page = doc[0]
690 pix1 = page.get_pixmap() # pixmap before cleaning
691
692 page.clean_contents() # clean page
693 pix2 = page.get_pixmap() # pixmap after cleaning
694 assert pix1.samples == pix2.samples # assert equality
695
696
697 def test_resolve_names():
698 """Test PDF name resolution."""
699 # guard against wrong PyMuPDF architecture version
700 if not hasattr(pymupdf.Document, "resolve_names"):
701 print("PyMuPDF version does not support resolving PDF names")
702 return
703 pickle_in = open(f"{scriptdir}/resources/cython.pickle", "rb")
704 old_names = pickle.load(pickle_in)
705 doc = pymupdf.open(f"{scriptdir}/resources/cython.pdf")
706 new_names = doc.resolve_names()
707 assert new_names == old_names
708
709 def test_2777():
710 document = pymupdf.Document()
711 page = document.new_page()
712 print(page.mediabox.width)
713
714 def test_2710():
715 doc = pymupdf.open(f'{scriptdir}/resources/test_2710.pdf')
716 page = doc.load_page(0)
717
718 print(f'test_2710(): {page.cropbox=}')
719 print(f'test_2710(): {page.mediabox=}')
720 print(f'test_2710(): {page.rect=}')
721
722 def numbers_approx_eq(a, b):
723 return abs(a-b) < 0.001
724 def points_approx_eq(a, b):
725 return numbers_approx_eq(a.x, b.x) and numbers_approx_eq(a.y, b.y)
726 def rects_approx_eq(a, b):
727 return points_approx_eq(a.bottom_left, b.bottom_left) and points_approx_eq(a.top_right, b.top_right)
728 def assert_rects_approx_eq(a, b):
729 assert rects_approx_eq(a, b), f'Not nearly identical: {a=} {b=}'
730
731 blocks = page.get_text('blocks')
732 print(f'test_2710(): {blocks=}')
733 assert len(blocks) == 2
734 block = blocks[1]
735 rect = pymupdf.Rect(block[:4])
736 text = block[4]
737 print(f'test_2710(): {rect=}')
738 print(f'test_2710(): {text=}')
739 assert text == 'Text at left page border\n'
740
741 assert_rects_approx_eq(page.cropbox, pymupdf.Rect(30.0, 30.0, 565.3200073242188, 811.9199829101562))
742 assert_rects_approx_eq(page.mediabox, pymupdf.Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562))
743 print(f'test_2710(): {pymupdf.mupdf_version_tuple=}')
744 # 2023-11-05: Currently broken in mupdf master.
745 print(f'test_2710(): Not Checking page.rect and rect.')
746 rebased = hasattr(pymupdf, 'mupdf')
747 if rebased:
748 wt = pymupdf.TOOLS.mupdf_warnings()
749 assert wt == (
750 "syntax error: cannot find ExtGState resource 'GS7'\n"
751 "syntax error: cannot find ExtGState resource 'GS8'\n"
752 "encountered syntax errors; page may not be correct"
753 )
754
755
756 def test_2736():
757 """Check handling of CropBox changes vis-a-vis a MediaBox with
758 negative coordinates."""
759 doc = pymupdf.open()
760 page = doc.new_page()
761
762 # fake a MediaBox for demo purposes
763 doc.xref_set_key(page.xref, "MediaBox", "[-30 -20 595 842]")
764
765 assert page.cropbox == pymupdf.Rect(-30, 0, 595, 862)
766 assert page.rect == pymupdf.Rect(0, 0, 625, 862)
767
768 # change the CropBox: shift by (10, 10) in both dimensions. Please note:
769 # To achieve this, 10 must be subtracted from 862! yo must never be negative!
770 page.set_cropbox(pymupdf.Rect(-20, 0, 595, 852))
771
772 # get CropBox from the page definition
773 assert doc.xref_get_key(page.xref, "CropBox")[1] == "[-20 -10 595 842]"
774 assert page.rect == pymupdf.Rect(0, 0, 615, 852)
775
776 error = False
777 text = ""
778 try: # check error detection
779 page.set_cropbox((-35, -10, 595, 842))
780 except Exception as e:
781 text = str(e)
782 error = True
783 assert error == True
784 assert text == "CropBox not in MediaBox"
785
786
787 def test_subset_fonts():
788 """Confirm subset_fonts is working."""
789 if not hasattr(pymupdf, "mupdf"):
790 print("Not testing 'test_subset_fonts' in classic.")
791 return
792 text = "Just some arbitrary text."
793 arch = pymupdf.Archive()
794 css = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch)
795 css += "* {font-family: ubuntu;}"
796 doc = pymupdf.open()
797 page = doc.new_page()
798 page.insert_htmlbox(page.rect, text, css=css, archive=arch)
799 doc.subset_fonts(verbose=True)
800 found = False
801 for xref in range(1, doc.xref_length()):
802 if "+Ubuntu#20Regular" in doc.xref_object(xref):
803 found = True
804 break
805 assert found is True
806
807
808 def test_2957_1():
809 """Text following a redaction must not change coordinates."""
810 # test file with redactions
811 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_1.pdf"))
812 page = doc[0]
813 # search for string that must not move by redactions
814 rects0 = page.search_for("6e9f73dfb4384a2b8af6ebba")
815 # sort rectangles vertically
816 rects0 = sorted(rects0, key=lambda r: r.y1)
817 assert len(rects0) == 2 # must be 2 redactions
818 page.apply_redactions()
819
820 # reload page to finalize updates
821 page = doc.reload_page(page)
822
823 # the two string must retain their positions (except rounding errors)
824 rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba")
825 rects1 = sorted(rects1, key=lambda r: r.y1)
826
827 assert page.first_annot is None # make sure annotations have disappeared
828 for i in range(2):
829 r0 = rects0[i].irect # take rounded rects
830 r1 = rects1[i].irect
831 assert r0 == r1
832
833
834 def test_2957_2():
835 """Redacted text must not change positions of remaining text."""
836 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_2.pdf"))
837 page = doc[0]
838 words0 = page.get_text("words") # all words before redacting
839 page.apply_redactions() # remove/redact the word "longer"
840 words1 = page.get_text("words") # extract words again
841 assert len(words1) == len(words0) - 1 # must be one word less
842 assert words0[3][4] == "longer" # just confirm test file is correct one
843 del words0[3] # remove the redacted word from first list
844 for i in range(len(words1)): # compare words
845 w1 = words1[i] # word after redaction
846 bbox1 = pymupdf.Rect(w1[:4]).irect # its IRect coordinates
847 w0 = words0[i] # word before redaction
848 bbox0 = pymupdf.Rect(w0[:4]).irect # its IRect coordinates
849 assert bbox0 == bbox1 # must be same coordinates
850
851
852 def test_707560():
853 """https://bugs.ghostscript.com/show_bug.cgi?id=707560
854 Ensure that redactions also remove characters with an empty width bbox.
855 """
856 # Make text that will contain characters with an empty bbox.
857
858 greetings = (
859 "Hello, World!", # english
860 "Hallo, Welt!", # german
861 "سلام دنیا!", # persian
862 "வணக்கம், உலகம்!", # tamil
863 "สวัสดีชาวโลก!", # thai
864 "Привіт Світ!", # ucranian
865 "שלום עולם!", # hebrew
866 "ওহে বিশ্ব!", # bengali
867 "你好世界!", # chinese
868 "こんにちは世界!", # japanese
869 "안녕하세요, 월드!", # korean
870 "नमस्कार, विश्व !", # sanskrit
871 "हैलो वर्ल्ड!", # hindi
872 )
873 text = " ... ".join([g for g in greetings])
874 where = (50, 50, 400, 500)
875 story = pymupdf.Story(text)
876 bio = io.BytesIO()
877 writer = pymupdf.DocumentWriter(bio)
878 more = True
879 while more:
880 dev = writer.begin_page(pymupdf.paper_rect("a4"))
881 more, _ = story.place(where)
882 story.draw(dev)
883 writer.end_page()
884 writer.close()
885 doc = pymupdf.open("pdf", bio)
886 page = doc[0]
887 text = page.get_text()
888 assert text, "Unexpected: test page has no text."
889 page.add_redact_annot(page.rect)
890 page.apply_redactions()
891 assert not page.get_text(), "Unexpected: text not fully redacted."
892
893
894 def test_3070():
895 with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3070.pdf')) as pdf:
896 links = pdf[0].get_links()
897 links[0]['uri'] = "https://www.ddg.gg"
898 pdf[0].update_link(links[0])
899 pdf.save(os.path.abspath(f'{__file__}/../../tests/test_3070_out.pdf'))
900
901 def test_bboxlog_2885():
902 doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_2885.pdf'))
903 page=doc[0]
904
905 bbl = page.get_bboxlog()
906 wt = pymupdf.TOOLS.mupdf_warnings()
907 assert wt == 'invalid marked content and clip nesting'
908
909 bbl = page.get_bboxlog(layers=True)
910 wt = pymupdf.TOOLS.mupdf_warnings()
911 assert wt == 'invalid marked content and clip nesting'
912
913 def test_3081():
914 '''
915 Check Document.close() closes file handles, even if a Page instance exists.
916 '''
917 path1 = os.path.abspath(f'{__file__}/../../tests/resources/1.pdf')
918 path2 = os.path.abspath(f'{__file__}/../../tests/test_3081-2.pdf')
919
920 rebased = hasattr(pymupdf, 'mupdf')
921
922 import shutil
923 import sys
924 import traceback
925 shutil.copy2(path1, path2)
926
927 # Find next two available fds.
928 next_fd_1 = os.open(path2, os.O_RDONLY)
929 next_fd_2 = os.open(path2, os.O_RDONLY)
930 os.close(next_fd_1)
931 os.close(next_fd_2)
932
933 def next_fd():
934 fd = os.open(path2, os.O_RDONLY)
935 os.close(fd)
936 return fd
937
938 fd1 = next_fd()
939 document = pymupdf.open(path2)
940 page = document[0]
941 fd2 = next_fd()
942 document.close()
943 if rebased:
944 assert document.this is None
945 assert page.this is None
946 try:
947 document.page_count()
948 except Exception as e:
949 print(f'Received expected exception: {e}')
950 #traceback.print_exc(file=sys.stdout)
951 assert str(e) == 'document closed'
952 else:
953 assert 0, 'Did not receive expected exception.'
954 fd3 = next_fd()
955 try:
956 page.bound()
957 except Exception as e:
958 print(f'Received expected exception: {e}')
959 #traceback.print_exc(file=sys.stdout)
960 if rebased:
961 assert str(e) == 'page is None'
962 else:
963 assert str(e) == 'orphaned object: parent is None'
964 else:
965 assert 0, 'Did not receive expected exception.'
966 page = None
967 fd4 = next_fd()
968 print(f'{next_fd_1=} {next_fd_2=}')
969 print(f'{fd1=} {fd2=} {fd3=} {fd4=}')
970 print(f'{document=}')
971 assert fd1 == next_fd_1
972 assert fd2 == next_fd_2 # Checks document only uses one fd.
973 assert fd3 == next_fd_1 # Checks no leaked fds after document close.
974 assert fd4 == next_fd_1 # Checks no leaked fds after failed page access.
975
976 def test_xml():
977 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
978 with pymupdf.open(path) as document:
979 document.get_xml_metadata()
980
981 def test_3112_set_xml_metadata():
982 document = pymupdf.Document()
983 document.set_xml_metadata('hello world')
984
985 def test_archive_3126():
986 if not hasattr(pymupdf, 'mupdf'):
987 print(f'Not running because known to fail with classic.')
988 return
989 p = os.path.abspath(f'{__file__}/../../tests/resources')
990 p = pathlib.Path(p)
991 archive = pymupdf.Archive(p)
992
993 def test_3140():
994 if not hasattr(pymupdf, 'mupdf'):
995 print(f'Not running test_3140 on classic, because Page.insert_htmlbox() not available.')
996 return
997 css2 = ''
998 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
999 oldfile = os.path.abspath(f'{__file__}/../../tests/test_3140_old.pdf')
1000 newfile = os.path.abspath(f'{__file__}/../../tests/test_3140_new.pdf')
1001 import shutil
1002 shutil.copy2(path, oldfile)
1003 def next_fd():
1004 fd = os.open(path, os.O_RDONLY)
1005 os.close(fd)
1006 return fd
1007 fd1 = next_fd()
1008 with pymupdf.open(oldfile) as doc: # open document
1009 page = doc[0]
1010 rect = pymupdf.Rect(130, 400, 430, 600)
1011 CELLS = pymupdf.make_table(rect, cols=3, rows=5)
1012 shape = page.new_shape() # create Shape
1013 for i in range(5):
1014 for j in range(3):
1015 qtext = "<b>" + "Ques #" + str(i*3+j+1) + ": " + "</b>" # codespell:ignore
1016 atext = "<b>" + "Ans:" + "</b>" # codespell:ignore
1017 qtext = qtext + '<br>' + atext
1018 shape.draw_rect(CELLS[i][j]) # draw rectangle
1019 page.insert_htmlbox(CELLS[i][j], qtext, css=css2, scale_low=0)
1020 shape.finish(width=2.5, color=pymupdf.pdfcolor["blue"], )
1021 shape.commit() # write all stuff to the page
1022 doc.subset_fonts()
1023 doc.ez_save(newfile)
1024 fd2 = next_fd()
1025 assert fd2 == fd1, f'{fd1=} {fd2=}'
1026 os.remove(oldfile)
1027
1028 def test_cli():
1029 if not hasattr(pymupdf, 'mupdf'):
1030 print('test_cli(): Not running on classic because of fitz_old.')
1031 return
1032 import subprocess
1033 subprocess.run(f'pymupdf -h', shell=1, check=1)
1034
1035
1036 def check_lines(expected_regexes, actual):
1037 '''
1038 Checks lines in <actual> match regexes in <expected_regexes>.
1039 '''
1040 print(f'check_lines():', flush=1)
1041 print(f'{expected_regexes=}', flush=1)
1042 print(f'{actual=}', flush=1)
1043 def str_to_list(s):
1044 if isinstance(s, str):
1045 return s.split('\n') if s else list()
1046 return s
1047 expected_regexes = str_to_list(expected_regexes)
1048 actual = str_to_list(actual)
1049 if expected_regexes and expected_regexes[-1]:
1050 expected_regexes.append('') # Always expect a trailing empty line.
1051 # Remove `None` regexes and make all regexes match entire lines.
1052 expected_regexes = [f'^{i}$' for i in expected_regexes if i is not None]
1053 print(f'{expected_regexes=}', flush=1)
1054 for expected_regex_line, actual_line in zip(expected_regexes, actual):
1055 print(f' {expected_regex_line=}', flush=1)
1056 print(f' {actual_line=}', flush=1)
1057 assert re.match(expected_regex_line, actual_line)
1058 assert len(expected_regexes) == len(actual), \
1059 f'expected/actual lines mismatch: {len(expected_regexes)=} {len(actual)=}.'
1060
1061 def test_cli_out():
1062 '''
1063 Check redirection of messages and log diagnostics with environment
1064 variables PYMUPDF_LOG and PYMUPDF_MESSAGE.
1065 '''
1066 if not hasattr(pymupdf, 'mupdf'):
1067 print('test_cli(): Not running on classic because of fitz_old.')
1068 return
1069 import platform
1070 import re
1071 import subprocess
1072 log_prefix = None
1073 if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
1074 log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''
1075
1076 def check(
1077 expect_out,
1078 expect_err,
1079 message=None,
1080 log=None,
1081 verbose=0,
1082 ):
1083 '''
1084 Sets PYMUPDF_MESSAGE to `message` and PYMUPDF_LOG to `log`, runs
1085 `pymupdf internal`, and checks lines stdout and stderr match regexes in
1086 `expect_out` and `expect_err`. Note that we enclose regexes in `^...$`.
1087 '''
1088 env = dict()
1089 if log:
1090 env['PYMUPDF_LOG'] = log
1091 if message:
1092 env['PYMUPDF_MESSAGE'] = message
1093 env = os.environ | env
1094 print(f'Running with {env=}: pymupdf internal', flush=1)
1095 cp = subprocess.run(f'pymupdf internal', shell=1, check=1, capture_output=1, env=env, text=True)
1096
1097 if verbose:
1098 #print(f'{cp.stdout=}.', flush=1)
1099 #print(f'{cp.stderr=}.', flush=1)
1100 sys.stdout.write(f'stdout:\n{textwrap.indent(cp.stdout, " ")}')
1101 sys.stdout.write(f'stderr:\n{textwrap.indent(cp.stderr, " ")}')
1102 check_lines(expect_out, cp.stdout)
1103 check_lines(expect_err, cp.stderr)
1104
1105 #
1106 print(f'Checking default, all output to stdout.')
1107 check(
1108 [
1109 log_prefix,
1110 'This is from PyMuPDF message[(][)][.]',
1111 '.+This is from PyMuPDF log[(][)].',
1112 ],
1113 '',
1114 )
1115
1116 #
1117 if platform.system() != 'Windows':
1118 print(f'Checking redirection of everything to /dev/null.')
1119 check('', '', 'path:/dev/null', 'path:/dev/null')
1120
1121 #
1122 print(f'Checking redirection to files.')
1123 path_out = os.path.abspath(f'{__file__}/../../tests/test_cli_out.out')
1124 path_err = os.path.abspath(f'{__file__}/../../tests/test_cli_out.err')
1125 check('', '', f'path:{path_out}', f'path:{path_err}')
1126 def read(path):
1127 with open(path) as f:
1128 return f.read()
1129 out = read(path_out)
1130 err = read(path_err)
1131 check_lines(['This is from PyMuPDF message[(][)][.]'], out)
1132 check_lines([log_prefix, '.+This is from PyMuPDF log[(][)][.]'], err)
1133
1134 #
1135 print(f'Checking redirection to fds.')
1136 check(
1137 [
1138 'This is from PyMuPDF message[(][)][.]',
1139 ],
1140 [
1141 log_prefix,
1142 '.+This is from PyMuPDF log[(][)].',
1143 ],
1144 'fd:1',
1145 'fd:2',
1146 )
1147
1148
1149 def test_use_python_logging():
1150 '''
1151 Checks pymupdf.use_python_logging().
1152 '''
1153 log_prefix = None
1154 if os.environ.get('PYMUPDF_USE_EXTRA') == '0':
1155 log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\''
1156
1157 if os.path.basename(__file__).startswith(f'test_fitz_'):
1158 # Do nothing, because command `pymupdf` outputs diagnostics containing
1159 # `pymupdf` which are not renamed to `fitz`, which breaks our checking.
1160 print(f'Not testing with fitz alias.')
1161 return
1162
1163 def check(
1164 code,
1165 regexes_stdout,
1166 regexes_stderr,
1167 env = None,
1168 ):
1169 code = textwrap.dedent(code)
1170 path = os.path.abspath(f'{__file__}/../../tests/resources_test_logging.py')
1171 with open(path, 'w') as f:
1172 f.write(code)
1173 command = f'{sys.executable} {path}'
1174 if env:
1175 print(f'{env=}.')
1176 env = os.environ | env
1177 print(f'Running: {command}', flush=1)
1178 try:
1179 cp = subprocess.run(command, shell=1, check=1, capture_output=1, text=True, env=env)
1180 except Exception as e:
1181 print(f'Command failed: {command}.', flush=1)
1182 print(f'Stdout\n{textwrap.indent(e.stdout, " ")}', flush=1)
1183 print(f'Stderr\n{textwrap.indent(e.stderr, " ")}', flush=1)
1184 raise
1185 check_lines(regexes_stdout, cp.stdout)
1186 check_lines(regexes_stderr, cp.stderr)
1187
1188 print(f'## Basic use of `logging` sends output to stderr instead of default stdout.')
1189 check(
1190 '''
1191 import pymupdf
1192 pymupdf.message('this is pymupdf.message()')
1193 pymupdf.log('this is pymupdf.log()')
1194 pymupdf.set_messages(pylogging=1)
1195 pymupdf.set_log(pylogging=1)
1196 pymupdf.message('this is pymupdf.message() 2')
1197 pymupdf.log('this is pymupdf.log() 2')
1198 ''',
1199 [
1200 log_prefix,
1201 'this is pymupdf.message[(][)]',
1202 '.+this is pymupdf.log[(][)]',
1203 ],
1204 [
1205 'this is pymupdf.message[(][)] 2',
1206 '.+this is pymupdf.log[(][)] 2',
1207 ],
1208 )
1209
1210 print(f'## Calling logging.basicConfig() makes logging output contain <LEVEL>:<name> prefixes.')
1211 check(
1212 '''
1213 import pymupdf
1214
1215 import logging
1216 logging.basicConfig()
1217 pymupdf.set_messages(pylogging=1)
1218 pymupdf.set_log(pylogging=1)
1219
1220 pymupdf.message('this is pymupdf.message()')
1221 pymupdf.log('this is pymupdf.log()')
1222 ''',
1223 [
1224 log_prefix,
1225 ],
1226 [
1227 'WARNING:pymupdf:this is pymupdf.message[(][)]',
1228 'WARNING:pymupdf:.+this is pymupdf.log[(][)]',
1229 ],
1230 )
1231
1232 print(f'## Setting PYMUPDF_USE_PYTHON_LOGGING=1 makes PyMuPDF use logging on startup.')
1233 check(
1234 '''
1235 import pymupdf
1236 pymupdf.message('this is pymupdf.message()')
1237 pymupdf.log('this is pymupdf.log()')
1238 ''',
1239 '',
1240 [
1241 log_prefix,
1242 'this is pymupdf.message[(][)]',
1243 '.+this is pymupdf.log[(][)]',
1244 ],
1245 env = dict(
1246 PYMUPDF_MESSAGE='logging:',
1247 PYMUPDF_LOG='logging:',
1248 ),
1249 )
1250
1251 print(f'## Pass explicit logger to pymupdf.use_python_logging() with logging.basicConfig().')
1252 check(
1253 '''
1254 import pymupdf
1255
1256 import logging
1257 logging.basicConfig()
1258
1259 logger = logging.getLogger('foo')
1260 pymupdf.set_messages(pylogging_logger=logger, pylogging_level=logging.WARNING)
1261 pymupdf.set_log(pylogging_logger=logger, pylogging_level=logging.ERROR)
1262
1263 pymupdf.message('this is pymupdf.message()')
1264 pymupdf.log('this is pymupdf.log()')
1265 ''',
1266 [
1267 log_prefix,
1268 ],
1269 [
1270 'WARNING:foo:this is pymupdf.message[(][)]',
1271 'ERROR:foo:.+this is pymupdf.log[(][)]',
1272 ],
1273 )
1274
1275 print(f'## Check pymupdf.set_messages() pylogging_level args.')
1276 check(
1277 '''
1278 import pymupdf
1279
1280 import logging
1281 logging.basicConfig(level=logging.DEBUG)
1282 logger = logging.getLogger('pymupdf')
1283
1284 pymupdf.set_messages(pylogging_level=logging.CRITICAL)
1285 pymupdf.set_log(pylogging_level=logging.INFO)
1286
1287 pymupdf.message('this is pymupdf.message()')
1288 pymupdf.log('this is pymupdf.log()')
1289 ''',
1290 [
1291 log_prefix,
1292 ],
1293 [
1294 'CRITICAL:pymupdf:this is pymupdf.message[(][)]',
1295 'INFO:pymupdf:.+this is pymupdf.log[(][)]',
1296 ],
1297 )
1298
1299 print(f'## Check messages() with sys.stdout=None.')
1300 check(
1301 '''
1302 import sys
1303 sys.stdout = None
1304 import pymupdf
1305
1306 pymupdf.message('this is pymupdf.message()')
1307 pymupdf.log('this is pymupdf.log()')
1308 ''',
1309 [],
1310 [],
1311 )
1312
1313
1314 def relpath(path, start=None):
1315 '''
1316 A 'safe' alternative to os.path.relpath(). Avoids an exception on Windows
1317 if the drive needs to change - in this case we use os.path.abspath().
1318 '''
1319 try:
1320 return os.path.relpath(path, start)
1321 except ValueError:
1322 # os.path.relpath() fails if trying to change drives.
1323 assert platform.system() == 'Windows'
1324 return os.path.abspath(path)
1325
1326
1327 def test_open():
1328
1329 if not hasattr(pymupdf, 'mupdf'):
1330 print('test_open(): not running on classic.')
1331 return
1332
1333 import re
1334 import textwrap
1335 import traceback
1336
1337 resources = relpath(os.path.abspath(f'{__file__}/../../tests/resources'))
1338
1339 # We convert all strings to use `/` instead of os.sep, which avoids
1340 # problems with regex's on windows.
1341 resources = resources.replace(os.sep, '/')
1342
1343 def check(filename=None, stream=None, filetype=None, exception=None):
1344 '''
1345 Checks we receive expected exception if specified.
1346 '''
1347 if isinstance(filename, str):
1348 filename = filename.replace(os.sep, '/')
1349 if exception:
1350 etype, eregex = exception
1351 if isinstance(eregex, (tuple, list)):
1352 # Treat as sequence of regexes to look for.
1353 eregex = '.*'.join(eregex)
1354 try:
1355 pymupdf.open(filename=filename, stream=stream, filetype=filetype)
1356 except etype as e:
1357 text = traceback.format_exc(limit=0)
1358 text = text.replace(os.sep, '/')
1359 text = textwrap.indent(text, ' ', lambda line: 1)
1360 assert re.search(eregex, text, re.DOTALL), \
1361 f'Incorrect exception text, expected {eregex=}, received:\n{text}'
1362 print(f'Received expected exception for {filename=} {stream=} {filetype=}:\n{text}')
1363 except Exception as e:
1364 assert 0, \
1365 f'Incorrect exception, expected {etype}, received {type(e)=}.'
1366 else:
1367 assert 0, f'Did not received exception, expected {etype=}. {filename=} {stream=} {filetype=} {exception=}'
1368 else:
1369 document = pymupdf.open(filename=filename, stream=stream, filetype=filetype)
1370 return document
1371
1372 check(f'{resources}/1.pdf')
1373
1374 check(f'{resources}/Bezier.epub')
1375
1376 path = 1234
1377 etype = TypeError
1378 eregex = re.escape(f'bad filename: type(filename)=<class \'int\'> filename={path}.')
1379 check(path, exception=(etype, eregex))
1380
1381 path = 'test_open-this-file-will-not-exist'
1382 etype = pymupdf.FileNotFoundError
1383 eregex = f'no such file: \'{path}\''
1384 check(path, exception=(etype, eregex))
1385
1386 path = resources
1387 etype = pymupdf.FileDataError
1388 eregex = re.escape(f'\'{path}\' is no file')
1389 check(path, exception=(etype, eregex))
1390
1391 path = relpath(os.path.abspath(f'{resources}/../test_open_empty'))
1392 path = path.replace(os.sep, '/')
1393 with open(path, 'w') as f:
1394 pass
1395 etype = pymupdf.EmptyFileError
1396 eregex = re.escape(f'Cannot open empty file: filename={path!r}.')
1397 check(path, exception=(etype, eregex))
1398
1399 path = f'{resources}/1.pdf'
1400 filetype = 'xps'
1401 etype = pymupdf.FileDataError
1402 # 2023-12-12: On OpenBSD, for some reason the SWIG catch code only catches
1403 # the exception as FzErrorBase.
1404 etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorFormat'
1405 eregex = (
1406 # With a sysinstall with separate MuPDF install, we get
1407 # `mupdf.FzErrorFormat` instead of `pymupdf.mupdf.FzErrorFormat`. So
1408 # we just search for the former.
1409 re.escape(f'mupdf.{etype2}: code=7: cannot recognize zip archive'),
1410 re.escape(f'pymupdf.FileDataError: Failed to open file {path!r} as type {filetype!r}.'),
1411 )
1412 check(path, filetype=filetype, exception=None)
1413
1414 path = f'{resources}/chinese-tables.pickle'
1415 etype = pymupdf.FileDataError
1416 etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorUnsupported'
1417 etext = (
1418 re.escape(f'mupdf.{etype2}: code=6: cannot find document handler for file: {path}'),
1419 re.escape(f'pymupdf.FileDataError: Failed to open file {path!r}.'),
1420 )
1421 check(path, exception=(etype, etext))
1422
1423 stream = 123
1424 etype = TypeError
1425 etext = re.escape('bad stream: type(stream)=<class \'int\'>.')
1426 check(stream=stream, exception=(etype, etext))
1427
1428 check(stream=b'', exception=(pymupdf.EmptyFileError, re.escape('Cannot open empty stream.')))
1429
1430
1431 def test_open2():
1432 '''
1433 Checks behaviour of fz_open_document() and fz_open_document_with_stream()
1434 with different filenames/magic values.
1435 '''
1436 if platform.system() == 'Windows':
1437 print(f'test_open2(): not running on Windows because `git ls-files` known fail on Github Windows runners.')
1438 return
1439
1440 root = os.path.normpath(f'{__file__}/../..')
1441 root = relpath(root)
1442
1443 # Find tests/resources/test_open2.* input files/streams. We calculate
1444 # paths relative to the PyMuPDF checkout directory <root>, to allow use
1445 # of tests/resources/test_open2_expected.json regardless of the actual
1446 # checkout directory.
1447 print()
1448 sys.path.append(root)
1449 try:
1450 import pipcl
1451 finally:
1452 del sys.path[0]
1453 paths = pipcl.git_items(f'{root}/tests/resources')
1454 paths = fnmatch.filter(paths, f'test_open2.*')
1455 paths = [f'tests/resources/{i}' for i in paths]
1456
1457 # Get list of extensions of input files.
1458 extensions = set()
1459 extensions.add('.txt')
1460 extensions.add('')
1461 for path in paths:
1462 _, ext = os.path.splitext(path)
1463 extensions.add(ext)
1464 extensions = sorted(list(extensions))
1465
1466 def get_result(e, document):
1467 '''
1468 Return fz_lookup_metadata(document, 'format') or [ERROR].
1469 '''
1470 if e:
1471 return f'[error]'
1472 else:
1473 try:
1474 return pymupdf.mupdf.fz_lookup_metadata2(document, 'format')
1475 except Exception:
1476 return ''
1477
1478 def dict_set_path(dict_, *items):
1479 for item in items[:-2]:
1480 dict_ = dict_.setdefault(item, dict())
1481 dict_[items[-2]] = items[-1]
1482
1483 results = dict()
1484
1485 # Prevent warnings while we are running.
1486 _g_out_message = pymupdf._g_out_message
1487 pymupdf._g_out_message = None
1488 try:
1489 results = dict()
1490
1491 for path in paths:
1492 print(path)
1493 for ext in extensions:
1494 path2 = f'{root}/foo{ext}'
1495 path3 = shutil.copy2(f'{root}/{path}', path2)
1496 assert(path3 == path2)
1497
1498 # Test fz_open_document().
1499 e = None
1500 document = None
1501 try:
1502 document = pymupdf.mupdf.fz_open_document(path2)
1503 except Exception as ee:
1504 e = ee
1505 wt = pymupdf.TOOLS.mupdf_warnings()
1506 text = get_result(e, document)
1507 print(f' fz_open_document({path2}) => {text}')
1508 dict_set_path(results, path, ext, 'file', text)
1509
1510 # Test fz_open_document_with_stream().
1511 e = None
1512 document = None
1513 with open(f'{root}/{path}', 'rb') as f:
1514 data = f.read()
1515 stream = pymupdf.mupdf.fz_open_memory(pymupdf.mupdf.python_buffer_data(data), len(data))
1516 try:
1517 document = pymupdf.mupdf.fz_open_document_with_stream(ext, stream)
1518 except Exception as ee:
1519 e = ee
1520 wt = pymupdf.TOOLS.mupdf_warnings()
1521 text = get_result(e, document)
1522 print(f' fz_open_document_with_stream(magic={ext!r}) => {text}')
1523 dict_set_path(results, path, ext, 'stream', text)
1524
1525 finally:
1526 pymupdf._g_out_message = _g_out_message
1527
1528 # Create html table.
1529 path_html = os.path.normpath(f'{__file__}/../../tests/test_open2.html')
1530 with open(path_html, 'w') as f:
1531 f.write(f'<html>\n')
1532 f.write(f'<body>\n')
1533 f.write(f'<p>{time.strftime("%F-%T")}\n')
1534 f.write(f'<table border="1" style="border-collapse:collapse" cellpadding="4">\n')
1535 f.write(f'<tr><td></td><th colspan="{len(extensions)}">Extension/magic')
1536 f.write(f'<tr><th style="border-bottom: 4px solid black; border-right: 4px solid black;">Data file</th>')
1537 for ext in extensions:
1538 f.write(f'<th style="border-bottom: 4px solid black;">{ext}</th>')
1539 f.write('\n')
1540 for path in sorted(results.keys()):
1541 _, ext = os.path.splitext(path)
1542 f.write(f'<tr><th style="border-right: 4px solid black;">{os.path.basename(path)}</th>')
1543 for ext2 in sorted(results[path].keys()):
1544 text_file = results[path][ext2]['file']
1545 text_stream = results[path][ext2]['stream']
1546 b1, b2 = ('<b>', '</b>') if ext2==ext else ('', '')
1547 if text_file == text_stream:
1548 if text_file == '[error]':
1549 f.write(f'<td><div style="color: #808080;">{b1}{text_file}{b2}</div></td>')
1550 else:
1551 f.write(f'<td>{b1}{text_file}{b2}</td>')
1552 else:
1553 f.write(f'<td>file: {b1}{text_file}{b2}<br>')
1554 f.write(f'stream: {b1}{text_stream}{b2}</td>')
1555 f.write('</tr>\n')
1556 f.write(f'</table>\n')
1557 f.write(f'/<body>\n')
1558 f.write(f'</html>\n')
1559 print(f'Have created: {path_html}')
1560
1561 path_out = os.path.normpath(f'{__file__}/../../tests/test_open2.json')
1562 with open(path_out, 'w') as f:
1563 json.dump(results, f, indent=4, sort_keys=1)
1564
1565 if pymupdf.mupdf_version_tuple >= (1, 26):
1566 with open(os.path.normpath(f'{__file__}/../../tests/resources/test_open2_expected.json')) as f:
1567 results_expected = json.load(f)
1568 if results != results_expected:
1569 print(f'results != results_expected:')
1570 def show(r, name):
1571 text = json.dumps(r, indent=4, sort_keys=1)
1572 print(f'{name}:')
1573 print(textwrap.indent(text, ' '))
1574 show(results_expected, 'results_expected')
1575 show(results, 'results')
1576 assert 0
1577
1578
1579 def test_533():
1580 if not hasattr(pymupdf, 'mupdf'):
1581 print('test_533(): Not running on classic.')
1582 return
1583 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
1584 doc = pymupdf.open(path)
1585 print()
1586 for p in doc:
1587 print(f'test_533(): for p in doc: {p=}.')
1588 for p in list(doc)[:]:
1589 print(f'test_533(): for p in list(doc)[:]: {p=}.')
1590 for p in doc[:]:
1591 print(f'test_533(): for p in doc[:]: {p=}.')
1592
1593 def test_3354():
1594 document = pymupdf.open(filename)
1595 v = dict(foo='bar')
1596 document.metadata = v
1597 assert document.metadata == v
1598
1599 def test_scientific_numbers():
1600 '''
1601 This is #3381.
1602 '''
1603 doc = pymupdf.open()
1604 page = doc.new_page(width=595, height=842)
1605 point = pymupdf.Point(1e-11, -1e-10)
1606 page.insert_text(point, "Test")
1607 contents = page.read_contents()
1608 print(f'{contents=}')
1609 assert b" 1e-" not in contents
1610
1611 def test_3615():
1612 print('')
1613 print(f'{pymupdf.pymupdf_version=}', flush=1)
1614 print(f'{pymupdf.VersionBind=}', flush=1)
1615 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3615.epub')
1616 doc = pymupdf.open(path)
1617 print(doc.pagemode)
1618 print(doc.pagelayout)
1619 wt = pymupdf.TOOLS.mupdf_warnings()
1620 assert wt
1621
1622 def test_3654():
1623 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3654.docx')
1624 content = ""
1625 with pymupdf.open(path) as document:
1626 for page in document:
1627 content += page.get_text() + '\n\n'
1628 content = content.strip()
1629
1630 def test_3727():
1631 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3727.pdf')
1632 doc = pymupdf.open(path)
1633 for page in doc:
1634 page.get_pixmap(matrix = pymupdf.Matrix(2,2))
1635
1636 def test_3569():
1637 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3569.pdf')
1638 document = pymupdf.open(path)
1639 page = document[0]
1640 svg = page.get_svg_image(text_as_path=False)
1641 print(f'{svg=}')
1642 if pymupdf.mupdf_version_tuple >= (1, 27):
1643 assert svg == (
1644 '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n'
1645 '<defs>\n'
1646 '<clipPath id="clip_1">\n'
1647 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n'
1648 '</clipPath>\n'
1649 '<clipPath id="clip_2">\n'
1650 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n'
1651 '</clipPath>\n'
1652 '</defs>\n'
1653 '<g clip-path="url(#clip_1)">\n'
1654 '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n'
1655 '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n'
1656 '</g>\n'
1657 '</g>\n'
1658 '<g clip-path="url(#clip_2)">\n'
1659 '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n'
1660 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n'
1661 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n'
1662 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n'
1663 '</g>\n'
1664 '</g>\n'
1665 '</svg>\n'
1666 )
1667 else:
1668 assert svg == (
1669 '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n'
1670 '<defs>\n'
1671 '<clipPath id="clip_1">\n'
1672 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n'
1673 '</clipPath>\n'
1674 '<clipPath id="clip_2">\n'
1675 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n'
1676 '</clipPath>\n'
1677 '</defs>\n'
1678 '<g clip-path="url(#clip_1)">\n'
1679 '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n'
1680 '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n'
1681 '</g>\n'
1682 '</g>\n'
1683 '<g clip-path="url(#clip_2)">\n'
1684 '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n'
1685 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n'
1686 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="0" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n'
1687 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n'
1688 '</g>\n'
1689 '</g>\n'
1690 '</svg>\n'
1691 )
1692 wt = pymupdf.TOOLS.mupdf_warnings()
1693 assert wt == 'unknown cid collection: PDFAUTOCAD-Indentity0\nnon-embedded font using identity encoding: ArialMT (mapping via )\ninvalid marked content and clip nesting'
1694
1695 def test_3450():
1696 # This issue is a slow-down, so we just show time taken - it's not safe
1697 # to fail if test takes too long because that can give spurious failures
1698 # depending on hardware etc.
1699 #
1700 # On a mac-mini, PyMuPDF-1.24.8 takes 60s, PyMuPDF-1.24.9 takes 4s.
1701 #
1702 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
1703 print(f'test_3450(): not running on valgrind because very slow.', flush=1)
1704 return
1705 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3450.pdf')
1706 pdf = pymupdf.open(path)
1707 page = pdf[0]
1708 t = time.time()
1709 pix = page.get_pixmap(alpha=False, dpi=150)
1710 t = time.time() - t
1711 print(f'test_3450(): {t=}')
1712
1713 def test_3859():
1714 print(f'{pymupdf.mupdf.PDF_NULL=}.')
1715 print(f'{pymupdf.mupdf.PDF_TRUE=}.')
1716 print(f'{pymupdf.mupdf.PDF_FALSE=}.')
1717 for name in ('NULL', 'TRUE', 'FALSE'):
1718 name2 = f'PDF_{name}'
1719 v = getattr(pymupdf.mupdf, name2)
1720 print(f'{name=} {name2=} {v=} {type(v)=}')
1721 assert type(v)==pymupdf.mupdf.PdfObj, f'`v` is not a pymupdf.mupdf.PdfObj.'
1722
1723 def test_3905():
1724 data = b'A,B,C,D\r\n1,2,1,2\r\n2,2,1,2\r\n'
1725 try:
1726 document = pymupdf.open(stream=data, filetype='pdf')
1727 except pymupdf.FileDataError as e:
1728 print(f'test_3905(): e: {e}')
1729 else:
1730 assert 0
1731 wt = pymupdf.TOOLS.mupdf_warnings()
1732 if pymupdf.mupdf_version_tuple >= (1, 26):
1733 assert wt == 'format error: cannot find version marker\ntrying to repair broken xref\nrepairing PDF document'
1734 else:
1735 assert wt == 'format error: cannot recognize version marker\ntrying to repair broken xref\nrepairing PDF document'
1736
1737 def test_3624():
1738 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3624.pdf')
1739 path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3624_expected.png')
1740 path_png = os.path.normpath(f'{__file__}/../../tests/test_3624.png')
1741 with pymupdf.open(path) as document:
1742 page = document[0]
1743 pixmap = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))
1744 print(f'Saving to {path_png=}.')
1745 pixmap.save(path_png)
1746 rms = gentle_compare.pixmaps_rms(path_png_expected, path_png)
1747 print(f'{rms=}')
1748 # We get small differences in sysinstall tests, where some thirdparty
1749 # libraries can differ.
1750 if rms > 1:
1751 pixmap_diff = gentle_compare.pixmaps_diff(path_png_expected, path_png)
1752 path_png_diff = os.path.normpath(f'{__file__}/../../tests/test_3624_diff.png')
1753 pixmap_diff.save(path_png_diff)
1754 assert 0, f'{rms=}'
1755
1756
1757 def test_4043():
1758 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4043.pdf')
1759 doc = pymupdf.open(path)
1760 doc.fullcopy_page(1)
1761
1762
1763 def test_4018():
1764 document = pymupdf.open()
1765 for page in document.pages(-1, -1):
1766 pass
1767
1768 def test_4034():
1769 # tests/resources/test_4034.pdf is first two pages of input file in
1770 # https://github.com/pymupdf/PyMuPDF/issues/4034.
1771 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4034.pdf')
1772 path_clean = os.path.normpath(f'{__file__}/../../tests/test_4034_out.pdf')
1773 with pymupdf.open(path) as document:
1774 pixmap1 = document[0].get_pixmap()
1775 document.save(path_clean, clean=1)
1776 with pymupdf.open(path_clean) as document:
1777 page = document[0]
1778 pixmap2 = document[0].get_pixmap()
1779 rms = gentle_compare.pixmaps_rms(pixmap1, pixmap2)
1780 print(f'test_4034(): Comparison of original/cleaned page 0 pixmaps: {rms=}.')
1781 if pymupdf.mupdf_version_tuple < (1, 25, 2):
1782 assert 30 < rms < 50
1783 else:
1784 assert rms == 0
1785
1786 def test_4309():
1787 document = pymupdf.open()
1788 page = document.new_page()
1789 document.delete_page()
1790
1791 def test_4263():
1792 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4263.pdf')
1793 path_out = f'{path}.linerarized.pdf'
1794 command = f'pymupdf clean -linear {path} {path_out}'
1795 print(f'Running: {command}')
1796 cp = subprocess.run(command, shell=1, check=0)
1797 if pymupdf.mupdf_version_tuple < (1, 26):
1798 assert cp.returncode == 0
1799 else:
1800 # Support for linerarisation dropped in MuPDF-1.26.
1801 assert cp.returncode
1802
1803 def test_4224():
1804 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4224.pdf')
1805 with pymupdf.open(path) as document:
1806 for page in document.pages():
1807 pixmap = page.get_pixmap(dpi=150)
1808 path_pixmap = f'{path}.{page.number}.png'
1809 pixmap.save(path_pixmap)
1810 print(f'Have created: {path_pixmap}')
1811 if pymupdf.mupdf_version_tuple < (1, 25, 5):
1812 wt = pymupdf.TOOLS.mupdf_warnings()
1813 assert wt == 'format error: negative code in 1d faxd\npadding truncated image'
1814
1815 def test_4319():
1816 # Have not seen this test reproduce issue #4319, but keeping it anyway.
1817 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4319.pdf')
1818 doc = pymupdf.open()
1819 page = doc.new_page()
1820 page.insert_text((10, 100), "some text")
1821 doc.save(path)
1822 doc.close()
1823 doc = pymupdf.open(path)
1824 page = doc[0]
1825 pc = doc.page_count
1826 doc.close()
1827 os.remove(path)
1828 print(f"removed {doc.name=}")
1829
1830 def test_3886():
1831 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3886.pdf')
1832 path_clean0 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean0.pdf')
1833 path_clean1 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean1.pdf')
1834
1835 with pymupdf.open(path) as document:
1836 pixmap = document[0].get_pixmap()
1837 document.save(path_clean0, clean=0)
1838
1839 with pymupdf.open(path) as document:
1840 document.save(path_clean1, clean=1)
1841
1842 with pymupdf.open(path_clean0) as document:
1843 pixmap_clean0 = document[0].get_pixmap()
1844
1845 with pymupdf.open(path_clean1) as document:
1846 pixmap_clean1 = document[0].get_pixmap()
1847
1848 rms_0 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean0)
1849 rms_1 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean1)
1850 print(f'test_3886(): {rms_0=} {rms_1=}')
1851
1852 def test_4415():
1853 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4415.pdf')
1854 path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out.png')
1855 path_out_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out_expected.png')
1856 with pymupdf.open(path) as document:
1857 page = document[0]
1858 rot = page.rotation
1859 orig = pymupdf.Point(100, 100) # apparent insertion point
1860 text = 'Text at Top-Left'
1861 mrot = page.derotation_matrix # matrix annihilating page rotation
1862 page.insert_text(orig * mrot, text, fontsize=60, rotate=rot)
1863 pixmap = page.get_pixmap()
1864 pixmap.save(path_out)
1865 rms = gentle_compare.pixmaps_rms(path_out_expected, path_out)
1866 assert rms == 0, f'{rms=}'
1867
1868 def test_4466():
1869 path = os.path.normpath(f'{__file__}/../../tests/test_4466.pdf')
1870 with pymupdf.Document(path) as document:
1871 for page in document:
1872 print(f'{page=}', flush=1)
1873 pixmap = page.get_pixmap(clip=(0, 0, 10, 10))
1874 print(f'{pixmap.n=} {pixmap.size=} {pixmap.stride=} {pixmap.width=} {pixmap.height=} {pixmap.x=} {pixmap.y=}', flush=1)
1875 pixmap.is_unicolor # Used to crash.
1876
1877
1878 def test_4479():
1879 # This passes with pymupdf-1.24.14, fails with pymupdf==1.25.*, passes with
1880 # pymupdf-1.26.0.
1881 print()
1882 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4479.pdf')
1883 with pymupdf.open(path) as document:
1884
1885 def show(items):
1886 for item in items:
1887 print(f' {repr(item)}')
1888
1889 items = document.layer_ui_configs()
1890 show(items)
1891 assert items == [
1892 {'depth': 0, 'locked': 0, 'number': 0, 'on': 1, 'text': 'layer_0', 'type': 'checkbox'},
1893 {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'},
1894 {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'},
1895 {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'},
1896 {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'},
1897 {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'},
1898 {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'},
1899 {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'},
1900 ]
1901
1902 document.set_layer_ui_config(0, pymupdf.PDF_OC_OFF)
1903 items = document.layer_ui_configs()
1904 show(items)
1905 assert items == [
1906 {'depth': 0, 'locked': 0, 'number': 0, 'on': 0, 'text': 'layer_0', 'type': 'checkbox'},
1907 {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'},
1908 {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'},
1909 {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'},
1910 {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'},
1911 {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'},
1912 {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'},
1913 {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'},
1914 ]
1915
1916
1917 def test_4533():
1918 print()
1919 path = util.download(
1920 'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf',
1921 'test_4533.pdf',
1922 size=16864501,
1923 )
1924 # This bug is a segv so we run the test in a child process.
1925 command = f'{sys.executable} -c "import pymupdf; document = pymupdf.open({path!r}); print(len(document))"'
1926 print(f'Running: {command}')
1927 cp = subprocess.run(command, shell=1, check=0)
1928 e = cp.returncode
1929 print(f'{e=}')
1930 if pymupdf.mupdf_version_tuple >= (1, 26, 6):
1931 assert e == 0
1932 else:
1933 assert e != 0
1934
1935
1936 def test_4564():
1937 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4564.pdf')
1938 print()
1939 with pymupdf.open(path) as document:
1940 for key in sorted(document.metadata.keys()):
1941 value = document.metadata[key]
1942 print(f'{key}: {value!r}')
1943 if pymupdf.mupdf_version_tuple >= (1, 27):
1944 assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\x00'
1945 else:
1946 assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\udcc0\udc80'
1947
1948
1949 def test_4496():
1950 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4496.hwpx')
1951 with pymupdf.open(path) as document:
1952 print(document.page_count)
1953
1954
1955 def test_gitinfo():
1956 # This doesn't really test very much, but can be useful to see the current
1957 # values.
1958 print('')
1959 print(f'test_4496():')
1960 print(f'{pymupdf.mupdf_location=}')
1961 print(f'{pymupdf.mupdf_version=}')
1962 print(f'{pymupdf.pymupdf_git_branch=}')
1963 print(f'{pymupdf.pymupdf_git_sha=}')
1964 print(f'{pymupdf.pymupdf_version=}')
1965 print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}')
1966
1967
1968 def test_4392():
1969 print()
1970 path = os.path.normpath(f'{__file__}/../../tests/test_4392.py')
1971 with open(path, 'w') as f:
1972 f.write('import pymupdf\n')
1973
1974 command = f'pytest {path}'
1975 print(f'Running: {command}', flush=1)
1976 e1 = subprocess.run(command, shell=1, check=0).returncode
1977 print(f'{e1=}')
1978
1979 command = f'pytest -Werror {path}'
1980 print(f'Running: {command}', flush=1)
1981 e2 = subprocess.run(command, shell=1, check=0).returncode
1982 print(f'{e2=}')
1983
1984 command = f'{sys.executable} -Werror -c "import pymupdf"'
1985 print(f'Running: {command}', flush=1)
1986 e3 = subprocess.run(command, shell=1, check=0).returncode
1987 print(f'{e3=}')
1988
1989 print(f'{e1=} {e2=} {e3=}')
1990
1991 print(f'{pymupdf.swig_version=}')
1992 print(f'{pymupdf.swig_version_tuple=}')
1993
1994 assert e1 == 5
1995 if pymupdf.swig_version_tuple >= (4, 4):
1996 assert e2 == 5
1997 assert e3 == 0
1998 else:
1999 # We get SEGV's etc with older swig.
2000 if platform.system() == 'Windows':
2001 assert (e2, e3) == (0xc0000005, 0xc0000005)
2002 else:
2003 # On plain linux we get (139, 139). On manylinux we get (-11,
2004 # -11). On MacOS we get (-11, -11).
2005 assert (e2, e3) == (139, 139) or (e2, e3) == (-11, -11)
2006
2007
2008 def test_4639():
2009 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4639.pdf')
2010 with pymupdf.open(path) as document:
2011 page = document[-1]
2012 page.get_bboxlog(layers=True)
2013
2014
2015 def test_4590():
2016
2017 # Create test PDF.
2018 path = os.path.normpath(f'{__file__}/../../tests/test_4590.pdf')
2019 with pymupdf.open() as document:
2020 page = document.new_page()
2021
2022 # Add some text
2023 text = 'This PDF contains a file attachment annotation.'
2024 page.insert_text((72, 72), text, fontsize=12)
2025
2026 # Create a sample file.
2027 path_sample = os.path.normpath(f'{__file__}/../../tests/test_4590_annotation_sample.txt')
2028 with open(path_sample, 'w') as f:
2029 f.write('This is a sample attachment file.')
2030
2031 # Read file as bytes
2032 with open(path_sample, 'rb') as f:
2033 sample = f.read()
2034
2035 # Define annotation position (rect or point)
2036 annot_pos = pymupdf.Rect(72, 100, 92, 120) # PushPin icon rectangle
2037
2038 # Add the file attachment annotation
2039 page.add_file_annot(
2040 point = annot_pos,
2041 buffer_ = sample,
2042 filename = 'sample.txt',
2043 ufilename = 'sample.txt',
2044 desc = 'A test attachment file.',
2045 icon = 'PushPin',
2046 )
2047
2048 # Save the PDF
2049 document.save(path)
2050
2051 # Check pymupdf.Document.scrub() works.
2052 with pymupdf.open(path) as document:
2053 document.scrub()