Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_general.py @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | 1d09e1dec1d9 |
| children | a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| 0:6015a75abc2d | 3:2c135c81b16c |
|---|---|
| 1 # encoding utf-8 | |
| 2 """ | |
| 3 * Confirm sample doc has no links and no annots. | |
| 4 * Confirm proper release of file handles via Document.close() | |
| 5 * Confirm properly raising exceptions in document creation | |
| 6 """ | |
| 7 import io | |
| 8 import os | |
| 9 | |
| 10 import fnmatch | |
| 11 import json | |
| 12 import pymupdf | |
| 13 import pathlib | |
| 14 import pickle | |
| 15 import platform | |
| 16 import re | |
| 17 import shutil | |
| 18 import subprocess | |
| 19 import sys | |
| 20 import textwrap | |
| 21 import time | |
| 22 import util | |
| 23 | |
| 24 import gentle_compare | |
| 25 | |
| 26 scriptdir = os.path.abspath(os.path.dirname(__file__)) | |
| 27 filename = os.path.join(scriptdir, "resources", "001003ED.pdf") | |
| 28 | |
| 29 | |
| 30 def test_haslinks(): | |
| 31 doc = pymupdf.open(filename) | |
| 32 assert doc.has_links() == False | |
| 33 | |
| 34 | |
| 35 def test_hasannots(): | |
| 36 doc = pymupdf.open(filename) | |
| 37 assert doc.has_annots() == False | |
| 38 | |
| 39 | |
| 40 def test_haswidgets(): | |
| 41 doc = pymupdf.open(filename) | |
| 42 assert doc.is_form_pdf == False | |
| 43 | |
| 44 | |
| 45 def test_isrepaired(): | |
| 46 doc = pymupdf.open(filename) | |
| 47 assert doc.is_repaired == False | |
| 48 pymupdf.TOOLS.mupdf_warnings() | |
| 49 | |
| 50 | |
| 51 def test_isdirty(): | |
| 52 doc = pymupdf.open(filename) | |
| 53 assert doc.is_dirty == False | |
| 54 | |
| 55 | |
| 56 def test_cansaveincrementally(): | |
| 57 doc = pymupdf.open(filename) | |
| 58 assert doc.can_save_incrementally() == True | |
| 59 | |
| 60 | |
| 61 def test_iswrapped(): | |
| 62 doc = pymupdf.open(filename) | |
| 63 page = doc[0] | |
| 64 assert page.is_wrapped | |
| 65 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 66 if pymupdf.mupdf_version_tuple >= (1, 26, 0): | |
| 67 assert wt == 'bogus font ascent/descent values (0 / 0)' | |
| 68 else: | |
| 69 assert not wt | |
| 70 | |
| 71 | |
| 72 def test_wrapcontents(): | |
| 73 doc = pymupdf.open(filename) | |
| 74 page = doc[0] | |
| 75 page.wrap_contents() | |
| 76 xref = page.get_contents()[0] | |
| 77 cont = page.read_contents() | |
| 78 doc.update_stream(xref, cont) | |
| 79 page.set_contents(xref) | |
| 80 assert len(page.get_contents()) == 1 | |
| 81 page.clean_contents() | |
| 82 rebased = hasattr(pymupdf, 'mupdf') | |
| 83 if rebased: | |
| 84 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 85 if pymupdf.mupdf_version_tuple >= (1, 26, 0): | |
| 86 assert wt == 'bogus font ascent/descent values (0 / 0)\nPDF stream Length incorrect' | |
| 87 else: | |
| 88 assert wt == 'PDF stream Length incorrect' | |
| 89 | |
| 90 | |
| 91 def test_page_clean_contents(): | |
| 92 """Assert that page contents cleaning actually is invoked.""" | |
| 93 doc = pymupdf.open() | |
| 94 page = doc.new_page() | |
| 95 | |
| 96 # draw two rectangles - will lead to two /Contents objects | |
| 97 page.draw_rect((10, 10, 20, 20)) | |
| 98 page.draw_rect((20, 20, 30, 30)) | |
| 99 assert len(page.get_contents()) == 2 | |
| 100 assert page.read_contents().startswith(b"q") == False | |
| 101 | |
| 102 # clean / consolidate into one /Contents object | |
| 103 page.clean_contents() | |
| 104 assert len(page.get_contents()) == 1 | |
| 105 assert page.read_contents().startswith(b"q") == True | |
| 106 | |
| 107 | |
| 108 def test_annot_clean_contents(): | |
| 109 """Assert that annot contents cleaning actually is invoked.""" | |
| 110 doc = pymupdf.open() | |
| 111 page = doc.new_page() | |
| 112 annot = page.add_highlight_annot((10, 10, 20, 20)) | |
| 113 | |
| 114 # the annotation appearance will not start with command b"q" | |
| 115 | |
| 116 | |
| 117 # invoke appearance stream cleaning and reformatting | |
| 118 annot.clean_contents() | |
| 119 | |
| 120 # appearance stream should now indeed start with command b"q" | |
| 121 assert annot._getAP().startswith(b"q") == True | |
| 122 | |
| 123 | |
| 124 def test_config(): | |
| 125 assert pymupdf.TOOLS.fitz_config["py-memory"] in (True, False) | |
| 126 | |
| 127 | |
| 128 def test_glyphnames(): | |
| 129 name = "INFINITY" | |
| 130 infinity = pymupdf.glyph_name_to_unicode(name) | |
| 131 assert pymupdf.unicode_to_glyph_name(infinity) == name | |
| 132 | |
| 133 | |
| 134 def test_rgbcodes(): | |
| 135 sRGB = 0xFFFFFF | |
| 136 assert pymupdf.sRGB_to_pdf(sRGB) == (1, 1, 1) | |
| 137 assert pymupdf.sRGB_to_rgb(sRGB) == (255, 255, 255) | |
| 138 | |
| 139 | |
| 140 def test_pdfstring(): | |
| 141 pymupdf.get_pdf_now() | |
| 142 pymupdf.get_pdf_str("Beijing, chinesisch 北京") | |
| 143 pymupdf.get_text_length("Beijing, chinesisch 北京", fontname="china-s") | |
| 144 pymupdf.get_pdf_str("Latin characters êßöäü") | |
| 145 | |
| 146 | |
| 147 def test_open_exceptions(): | |
| 148 path = os.path.normpath(f'{__file__}/../../tests/resources/001003ED.pdf') | |
| 149 doc = pymupdf.open(path, filetype="xps") | |
| 150 assert 'PDF' in doc.metadata["format"] | |
| 151 | |
| 152 doc = pymupdf.open(path, filetype="xxx") | |
| 153 assert 'PDF' in doc.metadata["format"] | |
| 154 | |
| 155 try: | |
| 156 pymupdf.open("x.y") | |
| 157 except Exception as e: | |
| 158 assert repr(e).startswith("FileNotFoundError") | |
| 159 else: | |
| 160 assert 0 | |
| 161 | |
| 162 try: | |
| 163 pymupdf.open(stream=b"", filetype="pdf") | |
| 164 except RuntimeError as e: | |
| 165 assert repr(e).startswith("EmptyFileError"), f'{repr(e)=}' | |
| 166 else: | |
| 167 print(f'{doc.metadata["format"]=}') | |
| 168 assert 0 | |
| 169 | |
| 170 | |
| 171 def test_bug1945(): | |
| 172 pdf = pymupdf.open(f'{scriptdir}/resources/bug1945.pdf') | |
| 173 buffer_ = io.BytesIO() | |
| 174 pdf.save(buffer_, clean=True) | |
| 175 | |
| 176 | |
| 177 def test_bug1971(): | |
| 178 for _ in range(2): | |
| 179 doc = pymupdf.Document(f'{scriptdir}/resources/bug1971.pdf') | |
| 180 page = next(doc.pages()) | |
| 181 page.get_drawings() | |
| 182 doc.close() | |
| 183 assert doc.is_closed | |
| 184 | |
| 185 def test_default_font(): | |
| 186 f = pymupdf.Font() | |
| 187 assert str(f) == "Font('Noto Serif Regular')" | |
| 188 assert repr(f) == "Font('Noto Serif Regular')" | |
| 189 | |
| 190 def test_add_ink_annot(): | |
| 191 import math | |
| 192 document = pymupdf.Document() | |
| 193 page = document.new_page() | |
| 194 line1 = [] | |
| 195 line2 = [] | |
| 196 for a in range( 0, 360*2, 15): | |
| 197 x = a | |
| 198 c = 300 + 200 * math.cos( a * math.pi/180) | |
| 199 s = 300 + 100 * math.sin( a * math.pi/180) | |
| 200 line1.append( (x, c)) | |
| 201 line2.append( (x, s)) | |
| 202 page.add_ink_annot( [line1, line2]) | |
| 203 page.insert_text((100, 72), 'Hello world') | |
| 204 page.add_text_annot((200,200), "Some Text") | |
| 205 page.get_bboxlog() | |
| 206 path = f'{scriptdir}/resources/test_add_ink_annot.pdf' | |
| 207 document.save( path) | |
| 208 print( f'Have saved to: path={path!r}') | |
| 209 | |
| 210 def test_techwriter_append(): | |
| 211 print(pymupdf.__doc__) | |
| 212 doc = pymupdf.open() | |
| 213 page = doc.new_page() | |
| 214 tw = pymupdf.TextWriter(page.rect) | |
| 215 text = "Red rectangle = TextWriter.text_rect, blue circle = .last_point" | |
| 216 r = tw.append((100, 100), text) | |
| 217 print(f'r={r!r}') | |
| 218 tw.write_text(page) | |
| 219 page.draw_rect(tw.text_rect, color=pymupdf.pdfcolor["red"]) | |
| 220 page.draw_circle(tw.last_point, 2, color=pymupdf.pdfcolor["blue"]) | |
| 221 path = f"{scriptdir}/resources/test_techwriter_append.pdf" | |
| 222 doc.ez_save(path) | |
| 223 print( f'Have saved to: {path}') | |
| 224 | |
| 225 def test_opacity(): | |
| 226 doc = pymupdf.open() | |
| 227 page = doc.new_page() | |
| 228 | |
| 229 annot1 = page.add_circle_annot((50, 50, 100, 100)) | |
| 230 annot1.set_colors(fill=(1, 0, 0), stroke=(1, 0, 0)) | |
| 231 annot1.set_opacity(2 / 3) | |
| 232 annot1.update(blend_mode="Multiply") | |
| 233 | |
| 234 annot2 = page.add_circle_annot((75, 75, 125, 125)) | |
| 235 annot2.set_colors(fill=(0, 0, 1), stroke=(0, 0, 1)) | |
| 236 annot2.set_opacity(1 / 3) | |
| 237 annot2.update(blend_mode="Multiply") | |
| 238 outfile = f'{scriptdir}/resources/opacity.pdf' | |
| 239 doc.save(outfile, expand=True, pretty=True) | |
| 240 print("saved", outfile) | |
| 241 | |
| 242 def test_get_text_dict(): | |
| 243 import json | |
| 244 doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf') | |
| 245 page=doc[0] | |
| 246 blocks=page.get_text("dict")["blocks"] | |
| 247 # Check no opaque types in `blocks`. | |
| 248 json.dumps( blocks, indent=4) | |
| 249 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 250 if pymupdf.mupdf_version_tuple >= (1, 26, 0): | |
| 251 assert wt == 'bogus font ascent/descent values (0 / 0)' | |
| 252 else: | |
| 253 assert not wt | |
| 254 | |
| 255 def test_font(): | |
| 256 font = pymupdf.Font() | |
| 257 print(repr(font)) | |
| 258 bbox = font.glyph_bbox( 65) | |
| 259 print( f'bbox={bbox!r}') | |
| 260 | |
| 261 def test_insert_font(): | |
| 262 doc=pymupdf.open(f'{scriptdir}/resources/v110-changes.pdf') | |
| 263 page = doc[0] | |
| 264 i = page.insert_font() | |
| 265 print( f'page.insert_font() => {i}') | |
| 266 | |
| 267 def test_2173(): | |
| 268 from pymupdf import IRect, Pixmap, CS_RGB, Colorspace | |
| 269 for i in range( 100): | |
| 270 #print( f'i={i!r}') | |
| 271 image = Pixmap(Colorspace(CS_RGB), IRect(0, 0, 13, 37)) | |
| 272 print( 'test_2173() finished') | |
| 273 | |
| 274 def test_texttrace(): | |
| 275 import time | |
| 276 document = pymupdf.Document( f'{scriptdir}/resources/joined.pdf') | |
| 277 t = time.time() | |
| 278 for page in document: | |
| 279 tt = page.get_texttrace() | |
| 280 t = time.time() - t | |
| 281 print( f'test_texttrace(): t={t!r}') | |
| 282 | |
| 283 # Repeat, this time writing data to file. | |
| 284 import json | |
| 285 path = f'{scriptdir}/resources/test_texttrace.txt' | |
| 286 print( f'test_texttrace(): Writing to: {path}') | |
| 287 with open( path, 'w') as f: | |
| 288 for i, page in enumerate(document): | |
| 289 tt = page.get_texttrace() | |
| 290 print( f'page {i} json:\n{json.dumps(tt, indent=" ")}', file=f) | |
| 291 | |
| 292 | |
| 293 def test_2533(): | |
| 294 """Assert correct char bbox in page.get_texttrace(). | |
| 295 | |
| 296 Search for a unique char on page and confirm that page.get_texttrace() | |
| 297 returns the same bbox as the search method. | |
| 298 """ | |
| 299 if hasattr(pymupdf, 'mupdf') and not pymupdf.g_use_extra: | |
| 300 print('Not running test_2533() because rebased with use_extra=0 known to fail') | |
| 301 return | |
| 302 pymupdf.TOOLS.set_small_glyph_heights(True) | |
| 303 try: | |
| 304 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2533.pdf")) | |
| 305 page = doc[0] | |
| 306 NEEDLE = "民" | |
| 307 ord_NEEDLE = ord(NEEDLE) | |
| 308 for span in page.get_texttrace(): | |
| 309 for char in span["chars"]: | |
| 310 if char[0] == ord_NEEDLE: | |
| 311 bbox = pymupdf.Rect(char[3]) | |
| 312 break | |
| 313 bbox2 = page.search_for(NEEDLE)[0] | |
| 314 assert bbox2 == bbox, f'{bbox=} {bbox2=} {bbox2-bbox=}.' | |
| 315 finally: | |
| 316 pymupdf.TOOLS.set_small_glyph_heights(False) | |
| 317 | |
| 318 | |
| 319 def test_2645(): | |
| 320 """Assert same font size calculation in corner cases. | |
| 321 """ | |
| 322 folder = os.path.join(scriptdir, "resources") | |
| 323 files = ("test_2645_1.pdf", "test_2645_2.pdf", "test_2645_3.pdf") | |
| 324 for f in files: | |
| 325 doc = pymupdf.open(os.path.join(folder, f)) | |
| 326 page = doc[0] | |
| 327 fontsize0 = page.get_texttrace()[0]["size"] | |
| 328 fontsize1 = page.get_text("dict", flags=pymupdf.TEXTFLAGS_TEXT)["blocks"][0]["lines"][ | |
| 329 0 | |
| 330 ]["spans"][0]["size"] | |
| 331 assert abs(fontsize0 - fontsize1) < 1e-5 | |
| 332 | |
| 333 | |
| 334 def test_2506(): | |
| 335 """Ensure expected font size across text writing angles.""" | |
| 336 doc = pymupdf.open() | |
| 337 page = doc.new_page() | |
| 338 point = pymupdf.Point(100, 300) # insertion point | |
| 339 fontsize = 11 # fontsize | |
| 340 text = "Hello" # text | |
| 341 angles = (0, 30, 60, 90, 120) # some angles | |
| 342 | |
| 343 # write text with different angles | |
| 344 for angle in angles: | |
| 345 page.insert_text( | |
| 346 point, text, fontsize=fontsize, morph=(point, pymupdf.Matrix(angle)) | |
| 347 ) | |
| 348 | |
| 349 # ensure correct fontsize for get_texttrace() - forgiving rounding problems | |
| 350 for span in page.get_texttrace(): | |
| 351 print(span["dir"]) | |
| 352 assert round(span["size"]) == fontsize | |
| 353 | |
| 354 # ensure correct fontsize for get_text() - forgiving rounding problems | |
| 355 for block in page.get_text("dict")["blocks"]: | |
| 356 for line in block["lines"]: | |
| 357 print(line["dir"]) | |
| 358 for span in line["spans"]: | |
| 359 print(span["size"]) | |
| 360 assert round(span["size"]) == fontsize | |
| 361 | |
| 362 | |
| 363 def test_2108(): | |
| 364 doc = pymupdf.open(f'{scriptdir}/resources/test_2108.pdf') | |
| 365 page = doc[0] | |
| 366 areas = page.search_for("{sig}") | |
| 367 rect = areas[0] | |
| 368 page.add_redact_annot(rect) | |
| 369 page.apply_redactions() | |
| 370 text = page.get_text() | |
| 371 | |
| 372 text_expected = b'Frau\nClaire Dunphy\nTeststra\xc3\x9fe 5\n12345 Stadt\nVertragsnummer: 12345\nSehr geehrte Frau Dunphy,\nText\nMit freundlichen Gr\xc3\xbc\xc3\x9fen\nTestfirma\nVertrag:\n 12345\nAnsprechpartner:\nJay Pritchet\nTelefon:\n123456\nE-Mail:\ntest@test.de\nDatum:\n07.12.2022\n'.decode('utf8') | |
| 373 | |
| 374 if 1: | |
| 375 # Verbose info. | |
| 376 print(f'test_2108(): text is:\n{text}') | |
| 377 print(f'') | |
| 378 print(f'test_2108(): repr(text) is:\n{text!r}') | |
| 379 print(f'') | |
| 380 print(f'test_2108(): repr(text.encode("utf8")) is:\n{text.encode("utf8")!r}') | |
| 381 print(f'') | |
| 382 print(f'test_2108(): text_expected is:\n{text_expected}') | |
| 383 print(f'') | |
| 384 print(f'test_2108(): repr(text_expected) is:\n{text_expected!r}') | |
| 385 print(f'') | |
| 386 print(f'test_2108(): repr(text_expected.encode("utf8")) is:\n{text_expected.encode("utf8")!r}') | |
| 387 | |
| 388 ok1 = (text == text_expected) | |
| 389 ok2 = (text.encode("utf8") == text_expected.encode("utf8")) | |
| 390 ok3 = (repr(text.encode("utf8")) == repr(text_expected.encode("utf8"))) | |
| 391 | |
| 392 print(f'') | |
| 393 print(f'ok1={ok1}') | |
| 394 print(f'ok2={ok2}') | |
| 395 print(f'ok3={ok3}') | |
| 396 | |
| 397 print(f'') | |
| 398 | |
| 399 print(f'{pymupdf.mupdf_version_tuple=}') | |
| 400 if pymupdf.mupdf_version_tuple >= (1, 21, 2): | |
| 401 print('Asserting text==text_expected') | |
| 402 assert text == text_expected | |
| 403 else: | |
| 404 print('Asserting text!=text_expected') | |
| 405 assert text != text_expected | |
| 406 | |
| 407 | |
| 408 def test_2238(): | |
| 409 filepath = f'{scriptdir}/resources/test2238.pdf' | |
| 410 doc = pymupdf.open(filepath) | |
| 411 rebased = hasattr(pymupdf, 'mupdf') | |
| 412 if rebased: | |
| 413 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 414 wt_expected = '' | |
| 415 if pymupdf.mupdf_version_tuple >= (1, 26): | |
| 416 wt_expected += 'garbage bytes before version marker\n' | |
| 417 wt_expected += 'syntax error: expected \'obj\' keyword (6 0 ?)\n' | |
| 418 else: | |
| 419 wt_expected += 'format error: cannot recognize version marker\n' | |
| 420 wt_expected += 'trying to repair broken xref\n' | |
| 421 wt_expected += 'repairing PDF document' | |
| 422 assert wt == wt_expected, f'{wt=}' | |
| 423 first_page = doc.load_page(0).get_text('text', clip=pymupdf.INFINITE_RECT()) | |
| 424 last_page = doc.load_page(-1).get_text('text', clip=pymupdf.INFINITE_RECT()) | |
| 425 | |
| 426 print(f'first_page={first_page!r}') | |
| 427 print(f'last_page={last_page!r}') | |
| 428 assert first_page == 'Hello World\n' | |
| 429 assert last_page == 'Hello World\n' | |
| 430 | |
| 431 first_page = doc.load_page(0).get_text('text') | |
| 432 last_page = doc.load_page(-1).get_text('text') | |
| 433 | |
| 434 print(f'first_page={first_page!r}') | |
| 435 print(f'last_page={last_page!r}') | |
| 436 assert first_page == 'Hello World\n' | |
| 437 assert last_page == 'Hello World\n' | |
| 438 | |
| 439 | |
| 440 def test_2093(): | |
| 441 if platform.python_implementation() == 'GraalVM': | |
| 442 print(f'test_2093(): Not running because slow on GraalVM.') | |
| 443 return | |
| 444 | |
| 445 doc = pymupdf.open(f'{scriptdir}/resources/test2093.pdf') | |
| 446 | |
| 447 def average_color(page): | |
| 448 pixmap = page.get_pixmap() | |
| 449 p_average = [0] * pixmap.n | |
| 450 for y in range(pixmap.height): | |
| 451 for x in range(pixmap.width): | |
| 452 p = pixmap.pixel(x, y) | |
| 453 for i in range(pixmap.n): | |
| 454 p_average[i] += p[i] | |
| 455 for i in range(pixmap.n): | |
| 456 p_average[i] /= (pixmap.height * pixmap.width) | |
| 457 return p_average | |
| 458 | |
| 459 page = doc.load_page(0) | |
| 460 pixel_average_before = average_color(page) | |
| 461 | |
| 462 rx=135.123 | |
| 463 ry=123.56878 | |
| 464 rw=69.8409 | |
| 465 rh=9.46397 | |
| 466 | |
| 467 x0 = rx | |
| 468 y0 = ry | |
| 469 x1 = rx + rw | |
| 470 y1 = ry + rh | |
| 471 | |
| 472 rect = pymupdf.Rect(x0, y0, x1, y1) | |
| 473 | |
| 474 font = pymupdf.Font("Helvetica") | |
| 475 fill_color=(0,0,0) | |
| 476 page.add_redact_annot( | |
| 477 quad=rect, | |
| 478 #text="null", | |
| 479 fontname=font.name, | |
| 480 fontsize=12, | |
| 481 align=pymupdf.TEXT_ALIGN_CENTER, | |
| 482 fill=fill_color, | |
| 483 text_color=(1,1,1), | |
| 484 ) | |
| 485 | |
| 486 page.apply_redactions() | |
| 487 pixel_average_after = average_color(page) | |
| 488 | |
| 489 print(f'pixel_average_before={pixel_average_before!r}') | |
| 490 print(f'pixel_average_after={pixel_average_after!r}') | |
| 491 | |
| 492 # Before this bug was fixed (MuPDF-1.22): | |
| 493 # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174] | |
| 494 # pixel_average_after=[138.68844553555772, 123.05687162237561, 100.74275056194105] | |
| 495 # After fix: | |
| 496 # pixel_average_before=[130.864323120088, 115.23577810900859, 92.9268559996174] | |
| 497 # pixel_average_after=[130.8889209934799, 115.25722751837269, 92.94327384463327] | |
| 498 # | |
| 499 for i in range(len(pixel_average_before)): | |
| 500 diff = pixel_average_before[i] - pixel_average_after[i] | |
| 501 assert abs(diff) < 0.1 | |
| 502 | |
| 503 out = f'{scriptdir}/resources/test2093-out.pdf' | |
| 504 doc.save(out) | |
| 505 print(f'Have written to: {out}') | |
| 506 | |
| 507 | |
| 508 def test_2182(): | |
| 509 print(f'test_2182() started') | |
| 510 doc = pymupdf.open(f'{scriptdir}/resources/test2182.pdf') | |
| 511 page = doc[0] | |
| 512 for annot in page.annots(): | |
| 513 print(annot) | |
| 514 print(f'test_2182() finished') | |
| 515 | |
| 516 | |
| 517 def test_2246(): | |
| 518 """ | |
| 519 Test / confirm identical text positions generated by | |
| 520 * page.insert_text() | |
| 521 versus | |
| 522 * TextWriter.write_text() | |
| 523 | |
| 524 ... under varying situations as follows: | |
| 525 | |
| 526 1. MediaBox does not start at (0, 0) | |
| 527 2. CropBox origin is different from that of MediaBox | |
| 528 3. Check for all 4 possible page rotations | |
| 529 | |
| 530 The test writes the same text at the same positions using `page.insert_text()`, | |
| 531 respectively `TextWriter.write_text()`. | |
| 532 Then extracts the text spans and confirms that they all occupy the same bbox. | |
| 533 This ensures coincidence of text positions of page.of insert_text() | |
| 534 (which is assumed correct) and TextWriter.write_text(). | |
| 535 """ | |
| 536 def bbox_count(rot): | |
| 537 """Make a page and insert identical text via different methods. | |
| 538 | |
| 539 Desired page rotation is a parameter. MediaBox and CropBox are chosen | |
| 540 to be "awkward": MediaBox does not start at (0,0) and CropBox is a | |
| 541 true subset of MediaBox. | |
| 542 """ | |
| 543 # bboxes of spans on page: same text positions are represented by ONE bbox | |
| 544 bboxes = set() | |
| 545 doc = pymupdf.open() | |
| 546 # prepare a page with desired MediaBox / CropBox peculiarities | |
| 547 mediabox = pymupdf.paper_rect("letter") | |
| 548 page = doc.new_page(width=mediabox.width, height=mediabox.height) | |
| 549 xref = page.xref | |
| 550 newmbox = list(map(float, doc.xref_get_key(xref, "MediaBox")[1][1:-1].split())) | |
| 551 newmbox = pymupdf.Rect(newmbox) | |
| 552 mbox = newmbox + (10, 20, 10, 20) | |
| 553 cbox = mbox + (10, 10, -10, -10) | |
| 554 doc.xref_set_key(xref, "MediaBox", "[%g %g %g %g]" % tuple(mbox)) | |
| 555 doc.xref_set_key(xref, "CrobBox", "[%g %g %g %g]" % tuple(cbox)) | |
| 556 # set page to desired rotation | |
| 557 page.set_rotation(rot) | |
| 558 page.insert_text((50, 50), "Text inserted at (50,50)") | |
| 559 tw = pymupdf.TextWriter(page.rect) | |
| 560 tw.append((50, 50), "Text inserted at (50,50)") | |
| 561 tw.write_text(page) | |
| 562 blocks = page.get_text("dict")["blocks"] | |
| 563 for b in blocks: | |
| 564 for l in b["lines"]: | |
| 565 for s in l["spans"]: | |
| 566 # store bbox rounded to 3 decimal places | |
| 567 bboxes.add(pymupdf.Rect(pymupdf.JM_TUPLE3(s["bbox"]))) | |
| 568 return len(bboxes) # should be 1! | |
| 569 | |
| 570 # the following tests must all pass | |
| 571 assert bbox_count(0) == 1 | |
| 572 assert bbox_count(90) == 1 | |
| 573 assert bbox_count(180) == 1 | |
| 574 assert bbox_count(270) == 1 | |
| 575 | |
| 576 | |
| 577 def test_2430(): | |
| 578 """Confirm that multiple font property checks will not destroy Py_None.""" | |
| 579 font = pymupdf.Font("helv") | |
| 580 for i in range(1000): | |
| 581 _ = font.flags | |
| 582 | |
| 583 def test_2692(): | |
| 584 document = pymupdf.Document(f'{scriptdir}/resources/2.pdf') | |
| 585 for page in document: | |
| 586 pix = page.get_pixmap(clip=pymupdf.Rect(0,0,10,10)) | |
| 587 dl = page.get_displaylist(annots=True) | |
| 588 pix = dl.get_pixmap( | |
| 589 matrix=pymupdf.Identity, | |
| 590 colorspace=pymupdf.csRGB, | |
| 591 alpha=False, | |
| 592 clip=pymupdf.Rect(0,0,10,10), | |
| 593 ) | |
| 594 pix = dl.get_pixmap( | |
| 595 matrix=pymupdf.Identity, | |
| 596 #colorspace=pymupdf.csRGB, | |
| 597 alpha=False, | |
| 598 clip=pymupdf.Rect(0,0,10,10), | |
| 599 ) | |
| 600 | |
| 601 | |
| 602 def test_2596(): | |
| 603 """Confirm correctly abandoning cache when reloading a page.""" | |
| 604 if platform.python_implementation() == 'GraalVM': | |
| 605 print(f'test_2596(): not running on Graal.') | |
| 606 return | |
| 607 doc = pymupdf.Document(f"{scriptdir}/resources/test_2596.pdf") | |
| 608 page = doc[0] | |
| 609 pix0 = page.get_pixmap() # render the page | |
| 610 _ = doc.tobytes(garbage=3) # save with garbage collection | |
| 611 | |
| 612 # Note this will invalidate cache content for this page. | |
| 613 # Reloading the page now empties the cache, so rendering | |
| 614 # will deliver the same pixmap | |
| 615 page = doc.reload_page(page) | |
| 616 pix1 = page.get_pixmap() | |
| 617 assert pix1.samples == pix0.samples | |
| 618 rebased = hasattr(pymupdf, 'mupdf') | |
| 619 if pymupdf.mupdf_version_tuple < (1, 26, 6): | |
| 620 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 621 assert wt == 'too many indirections (possible indirection cycle involving 24 0 R)' | |
| 622 | |
| 623 | |
| 624 def test_2730(): | |
| 625 """Ensure identical output across text extractions.""" | |
| 626 doc = pymupdf.open(f"{scriptdir}/resources/test_2730.pdf") | |
| 627 page = doc[0] | |
| 628 s1 = set(page.get_text()) # plain text extraction | |
| 629 s2 = set(page.get_text(sort=True)) # uses "blocks" extraction | |
| 630 s3 = set(page.get_textbox(page.rect)) | |
| 631 assert s1 == s2 | |
| 632 assert s1 == s3 | |
| 633 | |
| 634 | |
| 635 def test_2553(): | |
| 636 """Ensure identical output across text extractions.""" | |
| 637 verbose = 0 | |
| 638 doc = pymupdf.open(f"{scriptdir}/resources/test_2553.pdf") | |
| 639 page = doc[0] | |
| 640 | |
| 641 # extract plain text, build set of all characters | |
| 642 list1 = page.get_text() | |
| 643 set1 = set(list1) | |
| 644 | |
| 645 # extract text blocks, build set of all characters | |
| 646 list2 = page.get_text(sort=True) # internally uses "blocks" | |
| 647 set2 = set(list2) | |
| 648 | |
| 649 # extract textbox content, build set of all characters | |
| 650 list3 = page.get_textbox(page.rect) | |
| 651 set3 = set(list3) | |
| 652 | |
| 653 def show(l): | |
| 654 ret = f'len={len(l)}\n' | |
| 655 for c in l: | |
| 656 cc = ord(c) | |
| 657 if (cc >= 32 and cc < 127) or c == '\n': | |
| 658 ret += c | |
| 659 else: | |
| 660 ret += f' [0x{hex(cc)}]' | |
| 661 return ret | |
| 662 | |
| 663 if verbose: | |
| 664 print(f'list1:\n{show(list1)}') | |
| 665 print(f'list2:\n{show(list2)}') | |
| 666 print(f'list3:\n{show(list3)}') | |
| 667 | |
| 668 # all sets must be equal | |
| 669 assert set1 == set2 | |
| 670 assert set1 == set3 | |
| 671 | |
| 672 # With mupdf later than 1.23.4, this special page contains no invalid | |
| 673 # Unicodes. | |
| 674 # | |
| 675 print(f'Checking no occurrence of 0xFFFD, {pymupdf.mupdf_version_tuple=}.') | |
| 676 assert chr(0xFFFD) not in set1 | |
| 677 | |
| 678 def test_2553_2(): | |
| 679 doc = pymupdf.open(f"{scriptdir}/resources/test_2553-2.pdf") | |
| 680 page = doc[0] | |
| 681 | |
| 682 # extract plain text, ensure that there are no 0xFFFD characters | |
| 683 text = page.get_text() | |
| 684 assert chr(0xfffd) not in text | |
| 685 | |
| 686 def test_2635(): | |
| 687 """Rendering a page before and after cleaning it should yield the same pixmap.""" | |
| 688 doc = pymupdf.open(f"{scriptdir}/resources/test_2635.pdf") | |
| 689 page = doc[0] | |
| 690 pix1 = page.get_pixmap() # pixmap before cleaning | |
| 691 | |
| 692 page.clean_contents() # clean page | |
| 693 pix2 = page.get_pixmap() # pixmap after cleaning | |
| 694 assert pix1.samples == pix2.samples # assert equality | |
| 695 | |
| 696 | |
| 697 def test_resolve_names(): | |
| 698 """Test PDF name resolution.""" | |
| 699 # guard against wrong PyMuPDF architecture version | |
| 700 if not hasattr(pymupdf.Document, "resolve_names"): | |
| 701 print("PyMuPDF version does not support resolving PDF names") | |
| 702 return | |
| 703 pickle_in = open(f"{scriptdir}/resources/cython.pickle", "rb") | |
| 704 old_names = pickle.load(pickle_in) | |
| 705 doc = pymupdf.open(f"{scriptdir}/resources/cython.pdf") | |
| 706 new_names = doc.resolve_names() | |
| 707 assert new_names == old_names | |
| 708 | |
| 709 def test_2777(): | |
| 710 document = pymupdf.Document() | |
| 711 page = document.new_page() | |
| 712 print(page.mediabox.width) | |
| 713 | |
| 714 def test_2710(): | |
| 715 doc = pymupdf.open(f'{scriptdir}/resources/test_2710.pdf') | |
| 716 page = doc.load_page(0) | |
| 717 | |
| 718 print(f'test_2710(): {page.cropbox=}') | |
| 719 print(f'test_2710(): {page.mediabox=}') | |
| 720 print(f'test_2710(): {page.rect=}') | |
| 721 | |
| 722 def numbers_approx_eq(a, b): | |
| 723 return abs(a-b) < 0.001 | |
| 724 def points_approx_eq(a, b): | |
| 725 return numbers_approx_eq(a.x, b.x) and numbers_approx_eq(a.y, b.y) | |
| 726 def rects_approx_eq(a, b): | |
| 727 return points_approx_eq(a.bottom_left, b.bottom_left) and points_approx_eq(a.top_right, b.top_right) | |
| 728 def assert_rects_approx_eq(a, b): | |
| 729 assert rects_approx_eq(a, b), f'Not nearly identical: {a=} {b=}' | |
| 730 | |
| 731 blocks = page.get_text('blocks') | |
| 732 print(f'test_2710(): {blocks=}') | |
| 733 assert len(blocks) == 2 | |
| 734 block = blocks[1] | |
| 735 rect = pymupdf.Rect(block[:4]) | |
| 736 text = block[4] | |
| 737 print(f'test_2710(): {rect=}') | |
| 738 print(f'test_2710(): {text=}') | |
| 739 assert text == 'Text at left page border\n' | |
| 740 | |
| 741 assert_rects_approx_eq(page.cropbox, pymupdf.Rect(30.0, 30.0, 565.3200073242188, 811.9199829101562)) | |
| 742 assert_rects_approx_eq(page.mediabox, pymupdf.Rect(0.0, 0.0, 595.3200073242188, 841.9199829101562)) | |
| 743 print(f'test_2710(): {pymupdf.mupdf_version_tuple=}') | |
| 744 # 2023-11-05: Currently broken in mupdf master. | |
| 745 print(f'test_2710(): Not Checking page.rect and rect.') | |
| 746 rebased = hasattr(pymupdf, 'mupdf') | |
| 747 if rebased: | |
| 748 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 749 assert wt == ( | |
| 750 "syntax error: cannot find ExtGState resource 'GS7'\n" | |
| 751 "syntax error: cannot find ExtGState resource 'GS8'\n" | |
| 752 "encountered syntax errors; page may not be correct" | |
| 753 ) | |
| 754 | |
| 755 | |
| 756 def test_2736(): | |
| 757 """Check handling of CropBox changes vis-a-vis a MediaBox with | |
| 758 negative coordinates.""" | |
| 759 doc = pymupdf.open() | |
| 760 page = doc.new_page() | |
| 761 | |
| 762 # fake a MediaBox for demo purposes | |
| 763 doc.xref_set_key(page.xref, "MediaBox", "[-30 -20 595 842]") | |
| 764 | |
| 765 assert page.cropbox == pymupdf.Rect(-30, 0, 595, 862) | |
| 766 assert page.rect == pymupdf.Rect(0, 0, 625, 862) | |
| 767 | |
| 768 # change the CropBox: shift by (10, 10) in both dimensions. Please note: | |
| 769 # To achieve this, 10 must be subtracted from 862! yo must never be negative! | |
| 770 page.set_cropbox(pymupdf.Rect(-20, 0, 595, 852)) | |
| 771 | |
| 772 # get CropBox from the page definition | |
| 773 assert doc.xref_get_key(page.xref, "CropBox")[1] == "[-20 -10 595 842]" | |
| 774 assert page.rect == pymupdf.Rect(0, 0, 615, 852) | |
| 775 | |
| 776 error = False | |
| 777 text = "" | |
| 778 try: # check error detection | |
| 779 page.set_cropbox((-35, -10, 595, 842)) | |
| 780 except Exception as e: | |
| 781 text = str(e) | |
| 782 error = True | |
| 783 assert error == True | |
| 784 assert text == "CropBox not in MediaBox" | |
| 785 | |
| 786 | |
| 787 def test_subset_fonts(): | |
| 788 """Confirm subset_fonts is working.""" | |
| 789 if not hasattr(pymupdf, "mupdf"): | |
| 790 print("Not testing 'test_subset_fonts' in classic.") | |
| 791 return | |
| 792 text = "Just some arbitrary text." | |
| 793 arch = pymupdf.Archive() | |
| 794 css = pymupdf.css_for_pymupdf_font("ubuntu", archive=arch) | |
| 795 css += "* {font-family: ubuntu;}" | |
| 796 doc = pymupdf.open() | |
| 797 page = doc.new_page() | |
| 798 page.insert_htmlbox(page.rect, text, css=css, archive=arch) | |
| 799 doc.subset_fonts(verbose=True) | |
| 800 found = False | |
| 801 for xref in range(1, doc.xref_length()): | |
| 802 if "+Ubuntu#20Regular" in doc.xref_object(xref): | |
| 803 found = True | |
| 804 break | |
| 805 assert found is True | |
| 806 | |
| 807 | |
| 808 def test_2957_1(): | |
| 809 """Text following a redaction must not change coordinates.""" | |
| 810 # test file with redactions | |
| 811 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_1.pdf")) | |
| 812 page = doc[0] | |
| 813 # search for string that must not move by redactions | |
| 814 rects0 = page.search_for("6e9f73dfb4384a2b8af6ebba") | |
| 815 # sort rectangles vertically | |
| 816 rects0 = sorted(rects0, key=lambda r: r.y1) | |
| 817 assert len(rects0) == 2 # must be 2 redactions | |
| 818 page.apply_redactions() | |
| 819 | |
| 820 # reload page to finalize updates | |
| 821 page = doc.reload_page(page) | |
| 822 | |
| 823 # the two string must retain their positions (except rounding errors) | |
| 824 rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba") | |
| 825 rects1 = sorted(rects1, key=lambda r: r.y1) | |
| 826 | |
| 827 assert page.first_annot is None # make sure annotations have disappeared | |
| 828 for i in range(2): | |
| 829 r0 = rects0[i].irect # take rounded rects | |
| 830 r1 = rects1[i].irect | |
| 831 assert r0 == r1 | |
| 832 | |
| 833 | |
| 834 def test_2957_2(): | |
| 835 """Redacted text must not change positions of remaining text.""" | |
| 836 doc = pymupdf.open(os.path.join(scriptdir, "resources", "test_2957_2.pdf")) | |
| 837 page = doc[0] | |
| 838 words0 = page.get_text("words") # all words before redacting | |
| 839 page.apply_redactions() # remove/redact the word "longer" | |
| 840 words1 = page.get_text("words") # extract words again | |
| 841 assert len(words1) == len(words0) - 1 # must be one word less | |
| 842 assert words0[3][4] == "longer" # just confirm test file is correct one | |
| 843 del words0[3] # remove the redacted word from first list | |
| 844 for i in range(len(words1)): # compare words | |
| 845 w1 = words1[i] # word after redaction | |
| 846 bbox1 = pymupdf.Rect(w1[:4]).irect # its IRect coordinates | |
| 847 w0 = words0[i] # word before redaction | |
| 848 bbox0 = pymupdf.Rect(w0[:4]).irect # its IRect coordinates | |
| 849 assert bbox0 == bbox1 # must be same coordinates | |
| 850 | |
| 851 | |
| 852 def test_707560(): | |
| 853 """https://bugs.ghostscript.com/show_bug.cgi?id=707560 | |
| 854 Ensure that redactions also remove characters with an empty width bbox. | |
| 855 """ | |
| 856 # Make text that will contain characters with an empty bbox. | |
| 857 | |
| 858 greetings = ( | |
| 859 "Hello, World!", # english | |
| 860 "Hallo, Welt!", # german | |
| 861 "سلام دنیا!", # persian | |
| 862 "வணக்கம், உலகம்!", # tamil | |
| 863 "สวัสดีชาวโลก!", # thai | |
| 864 "Привіт Світ!", # ucranian | |
| 865 "שלום עולם!", # hebrew | |
| 866 "ওহে বিশ্ব!", # bengali | |
| 867 "你好世界!", # chinese | |
| 868 "こんにちは世界!", # japanese | |
| 869 "안녕하세요, 월드!", # korean | |
| 870 "नमस्कार, विश्व !", # sanskrit | |
| 871 "हैलो वर्ल्ड!", # hindi | |
| 872 ) | |
| 873 text = " ... ".join([g for g in greetings]) | |
| 874 where = (50, 50, 400, 500) | |
| 875 story = pymupdf.Story(text) | |
| 876 bio = io.BytesIO() | |
| 877 writer = pymupdf.DocumentWriter(bio) | |
| 878 more = True | |
| 879 while more: | |
| 880 dev = writer.begin_page(pymupdf.paper_rect("a4")) | |
| 881 more, _ = story.place(where) | |
| 882 story.draw(dev) | |
| 883 writer.end_page() | |
| 884 writer.close() | |
| 885 doc = pymupdf.open("pdf", bio) | |
| 886 page = doc[0] | |
| 887 text = page.get_text() | |
| 888 assert text, "Unexpected: test page has no text." | |
| 889 page.add_redact_annot(page.rect) | |
| 890 page.apply_redactions() | |
| 891 assert not page.get_text(), "Unexpected: text not fully redacted." | |
| 892 | |
| 893 | |
| 894 def test_3070(): | |
| 895 with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_3070.pdf')) as pdf: | |
| 896 links = pdf[0].get_links() | |
| 897 links[0]['uri'] = "https://www.ddg.gg" | |
| 898 pdf[0].update_link(links[0]) | |
| 899 pdf.save(os.path.abspath(f'{__file__}/../../tests/test_3070_out.pdf')) | |
| 900 | |
| 901 def test_bboxlog_2885(): | |
| 902 doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_2885.pdf')) | |
| 903 page=doc[0] | |
| 904 | |
| 905 bbl = page.get_bboxlog() | |
| 906 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 907 assert wt == 'invalid marked content and clip nesting' | |
| 908 | |
| 909 bbl = page.get_bboxlog(layers=True) | |
| 910 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 911 assert wt == 'invalid marked content and clip nesting' | |
| 912 | |
| 913 def test_3081(): | |
| 914 ''' | |
| 915 Check Document.close() closes file handles, even if a Page instance exists. | |
| 916 ''' | |
| 917 path1 = os.path.abspath(f'{__file__}/../../tests/resources/1.pdf') | |
| 918 path2 = os.path.abspath(f'{__file__}/../../tests/test_3081-2.pdf') | |
| 919 | |
| 920 rebased = hasattr(pymupdf, 'mupdf') | |
| 921 | |
| 922 import shutil | |
| 923 import sys | |
| 924 import traceback | |
| 925 shutil.copy2(path1, path2) | |
| 926 | |
| 927 # Find next two available fds. | |
| 928 next_fd_1 = os.open(path2, os.O_RDONLY) | |
| 929 next_fd_2 = os.open(path2, os.O_RDONLY) | |
| 930 os.close(next_fd_1) | |
| 931 os.close(next_fd_2) | |
| 932 | |
| 933 def next_fd(): | |
| 934 fd = os.open(path2, os.O_RDONLY) | |
| 935 os.close(fd) | |
| 936 return fd | |
| 937 | |
| 938 fd1 = next_fd() | |
| 939 document = pymupdf.open(path2) | |
| 940 page = document[0] | |
| 941 fd2 = next_fd() | |
| 942 document.close() | |
| 943 if rebased: | |
| 944 assert document.this is None | |
| 945 assert page.this is None | |
| 946 try: | |
| 947 document.page_count() | |
| 948 except Exception as e: | |
| 949 print(f'Received expected exception: {e}') | |
| 950 #traceback.print_exc(file=sys.stdout) | |
| 951 assert str(e) == 'document closed' | |
| 952 else: | |
| 953 assert 0, 'Did not receive expected exception.' | |
| 954 fd3 = next_fd() | |
| 955 try: | |
| 956 page.bound() | |
| 957 except Exception as e: | |
| 958 print(f'Received expected exception: {e}') | |
| 959 #traceback.print_exc(file=sys.stdout) | |
| 960 if rebased: | |
| 961 assert str(e) == 'page is None' | |
| 962 else: | |
| 963 assert str(e) == 'orphaned object: parent is None' | |
| 964 else: | |
| 965 assert 0, 'Did not receive expected exception.' | |
| 966 page = None | |
| 967 fd4 = next_fd() | |
| 968 print(f'{next_fd_1=} {next_fd_2=}') | |
| 969 print(f'{fd1=} {fd2=} {fd3=} {fd4=}') | |
| 970 print(f'{document=}') | |
| 971 assert fd1 == next_fd_1 | |
| 972 assert fd2 == next_fd_2 # Checks document only uses one fd. | |
| 973 assert fd3 == next_fd_1 # Checks no leaked fds after document close. | |
| 974 assert fd4 == next_fd_1 # Checks no leaked fds after failed page access. | |
| 975 | |
| 976 def test_xml(): | |
| 977 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') | |
| 978 with pymupdf.open(path) as document: | |
| 979 document.get_xml_metadata() | |
| 980 | |
| 981 def test_3112_set_xml_metadata(): | |
| 982 document = pymupdf.Document() | |
| 983 document.set_xml_metadata('hello world') | |
| 984 | |
| 985 def test_archive_3126(): | |
| 986 if not hasattr(pymupdf, 'mupdf'): | |
| 987 print(f'Not running because known to fail with classic.') | |
| 988 return | |
| 989 p = os.path.abspath(f'{__file__}/../../tests/resources') | |
| 990 p = pathlib.Path(p) | |
| 991 archive = pymupdf.Archive(p) | |
| 992 | |
| 993 def test_3140(): | |
| 994 if not hasattr(pymupdf, 'mupdf'): | |
| 995 print(f'Not running test_3140 on classic, because Page.insert_htmlbox() not available.') | |
| 996 return | |
| 997 css2 = '' | |
| 998 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') | |
| 999 oldfile = os.path.abspath(f'{__file__}/../../tests/test_3140_old.pdf') | |
| 1000 newfile = os.path.abspath(f'{__file__}/../../tests/test_3140_new.pdf') | |
| 1001 import shutil | |
| 1002 shutil.copy2(path, oldfile) | |
| 1003 def next_fd(): | |
| 1004 fd = os.open(path, os.O_RDONLY) | |
| 1005 os.close(fd) | |
| 1006 return fd | |
| 1007 fd1 = next_fd() | |
| 1008 with pymupdf.open(oldfile) as doc: # open document | |
| 1009 page = doc[0] | |
| 1010 rect = pymupdf.Rect(130, 400, 430, 600) | |
| 1011 CELLS = pymupdf.make_table(rect, cols=3, rows=5) | |
| 1012 shape = page.new_shape() # create Shape | |
| 1013 for i in range(5): | |
| 1014 for j in range(3): | |
| 1015 qtext = "<b>" + "Ques #" + str(i*3+j+1) + ": " + "</b>" # codespell:ignore | |
| 1016 atext = "<b>" + "Ans:" + "</b>" # codespell:ignore | |
| 1017 qtext = qtext + '<br>' + atext | |
| 1018 shape.draw_rect(CELLS[i][j]) # draw rectangle | |
| 1019 page.insert_htmlbox(CELLS[i][j], qtext, css=css2, scale_low=0) | |
| 1020 shape.finish(width=2.5, color=pymupdf.pdfcolor["blue"], ) | |
| 1021 shape.commit() # write all stuff to the page | |
| 1022 doc.subset_fonts() | |
| 1023 doc.ez_save(newfile) | |
| 1024 fd2 = next_fd() | |
| 1025 assert fd2 == fd1, f'{fd1=} {fd2=}' | |
| 1026 os.remove(oldfile) | |
| 1027 | |
| 1028 def test_cli(): | |
| 1029 if not hasattr(pymupdf, 'mupdf'): | |
| 1030 print('test_cli(): Not running on classic because of fitz_old.') | |
| 1031 return | |
| 1032 import subprocess | |
| 1033 subprocess.run(f'pymupdf -h', shell=1, check=1) | |
| 1034 | |
| 1035 | |
| 1036 def check_lines(expected_regexes, actual): | |
| 1037 ''' | |
| 1038 Checks lines in <actual> match regexes in <expected_regexes>. | |
| 1039 ''' | |
| 1040 print(f'check_lines():', flush=1) | |
| 1041 print(f'{expected_regexes=}', flush=1) | |
| 1042 print(f'{actual=}', flush=1) | |
| 1043 def str_to_list(s): | |
| 1044 if isinstance(s, str): | |
| 1045 return s.split('\n') if s else list() | |
| 1046 return s | |
| 1047 expected_regexes = str_to_list(expected_regexes) | |
| 1048 actual = str_to_list(actual) | |
| 1049 if expected_regexes and expected_regexes[-1]: | |
| 1050 expected_regexes.append('') # Always expect a trailing empty line. | |
| 1051 # Remove `None` regexes and make all regexes match entire lines. | |
| 1052 expected_regexes = [f'^{i}$' for i in expected_regexes if i is not None] | |
| 1053 print(f'{expected_regexes=}', flush=1) | |
| 1054 for expected_regex_line, actual_line in zip(expected_regexes, actual): | |
| 1055 print(f' {expected_regex_line=}', flush=1) | |
| 1056 print(f' {actual_line=}', flush=1) | |
| 1057 assert re.match(expected_regex_line, actual_line) | |
| 1058 assert len(expected_regexes) == len(actual), \ | |
| 1059 f'expected/actual lines mismatch: {len(expected_regexes)=} {len(actual)=}.' | |
| 1060 | |
| 1061 def test_cli_out(): | |
| 1062 ''' | |
| 1063 Check redirection of messages and log diagnostics with environment | |
| 1064 variables PYMUPDF_LOG and PYMUPDF_MESSAGE. | |
| 1065 ''' | |
| 1066 if not hasattr(pymupdf, 'mupdf'): | |
| 1067 print('test_cli(): Not running on classic because of fitz_old.') | |
| 1068 return | |
| 1069 import platform | |
| 1070 import re | |
| 1071 import subprocess | |
| 1072 log_prefix = None | |
| 1073 if os.environ.get('PYMUPDF_USE_EXTRA') == '0': | |
| 1074 log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\'' | |
| 1075 | |
| 1076 def check( | |
| 1077 expect_out, | |
| 1078 expect_err, | |
| 1079 message=None, | |
| 1080 log=None, | |
| 1081 verbose=0, | |
| 1082 ): | |
| 1083 ''' | |
| 1084 Sets PYMUPDF_MESSAGE to `message` and PYMUPDF_LOG to `log`, runs | |
| 1085 `pymupdf internal`, and checks lines stdout and stderr match regexes in | |
| 1086 `expect_out` and `expect_err`. Note that we enclose regexes in `^...$`. | |
| 1087 ''' | |
| 1088 env = dict() | |
| 1089 if log: | |
| 1090 env['PYMUPDF_LOG'] = log | |
| 1091 if message: | |
| 1092 env['PYMUPDF_MESSAGE'] = message | |
| 1093 env = os.environ | env | |
| 1094 print(f'Running with {env=}: pymupdf internal', flush=1) | |
| 1095 cp = subprocess.run(f'pymupdf internal', shell=1, check=1, capture_output=1, env=env, text=True) | |
| 1096 | |
| 1097 if verbose: | |
| 1098 #print(f'{cp.stdout=}.', flush=1) | |
| 1099 #print(f'{cp.stderr=}.', flush=1) | |
| 1100 sys.stdout.write(f'stdout:\n{textwrap.indent(cp.stdout, " ")}') | |
| 1101 sys.stdout.write(f'stderr:\n{textwrap.indent(cp.stderr, " ")}') | |
| 1102 check_lines(expect_out, cp.stdout) | |
| 1103 check_lines(expect_err, cp.stderr) | |
| 1104 | |
| 1105 # | |
| 1106 print(f'Checking default, all output to stdout.') | |
| 1107 check( | |
| 1108 [ | |
| 1109 log_prefix, | |
| 1110 'This is from PyMuPDF message[(][)][.]', | |
| 1111 '.+This is from PyMuPDF log[(][)].', | |
| 1112 ], | |
| 1113 '', | |
| 1114 ) | |
| 1115 | |
| 1116 # | |
| 1117 if platform.system() != 'Windows': | |
| 1118 print(f'Checking redirection of everything to /dev/null.') | |
| 1119 check('', '', 'path:/dev/null', 'path:/dev/null') | |
| 1120 | |
| 1121 # | |
| 1122 print(f'Checking redirection to files.') | |
| 1123 path_out = os.path.abspath(f'{__file__}/../../tests/test_cli_out.out') | |
| 1124 path_err = os.path.abspath(f'{__file__}/../../tests/test_cli_out.err') | |
| 1125 check('', '', f'path:{path_out}', f'path:{path_err}') | |
| 1126 def read(path): | |
| 1127 with open(path) as f: | |
| 1128 return f.read() | |
| 1129 out = read(path_out) | |
| 1130 err = read(path_err) | |
| 1131 check_lines(['This is from PyMuPDF message[(][)][.]'], out) | |
| 1132 check_lines([log_prefix, '.+This is from PyMuPDF log[(][)][.]'], err) | |
| 1133 | |
| 1134 # | |
| 1135 print(f'Checking redirection to fds.') | |
| 1136 check( | |
| 1137 [ | |
| 1138 'This is from PyMuPDF message[(][)][.]', | |
| 1139 ], | |
| 1140 [ | |
| 1141 log_prefix, | |
| 1142 '.+This is from PyMuPDF log[(][)].', | |
| 1143 ], | |
| 1144 'fd:1', | |
| 1145 'fd:2', | |
| 1146 ) | |
| 1147 | |
| 1148 | |
| 1149 def test_use_python_logging(): | |
| 1150 ''' | |
| 1151 Checks pymupdf.use_python_logging(). | |
| 1152 ''' | |
| 1153 log_prefix = None | |
| 1154 if os.environ.get('PYMUPDF_USE_EXTRA') == '0': | |
| 1155 log_prefix = f'.+Using non-default setting from PYMUPDF_USE_EXTRA: \'0\'' | |
| 1156 | |
| 1157 if os.path.basename(__file__).startswith(f'test_fitz_'): | |
| 1158 # Do nothing, because command `pymupdf` outputs diagnostics containing | |
| 1159 # `pymupdf` which are not renamed to `fitz`, which breaks our checking. | |
| 1160 print(f'Not testing with fitz alias.') | |
| 1161 return | |
| 1162 | |
| 1163 def check( | |
| 1164 code, | |
| 1165 regexes_stdout, | |
| 1166 regexes_stderr, | |
| 1167 env = None, | |
| 1168 ): | |
| 1169 code = textwrap.dedent(code) | |
| 1170 path = os.path.abspath(f'{__file__}/../../tests/resources_test_logging.py') | |
| 1171 with open(path, 'w') as f: | |
| 1172 f.write(code) | |
| 1173 command = f'{sys.executable} {path}' | |
| 1174 if env: | |
| 1175 print(f'{env=}.') | |
| 1176 env = os.environ | env | |
| 1177 print(f'Running: {command}', flush=1) | |
| 1178 try: | |
| 1179 cp = subprocess.run(command, shell=1, check=1, capture_output=1, text=True, env=env) | |
| 1180 except Exception as e: | |
| 1181 print(f'Command failed: {command}.', flush=1) | |
| 1182 print(f'Stdout\n{textwrap.indent(e.stdout, " ")}', flush=1) | |
| 1183 print(f'Stderr\n{textwrap.indent(e.stderr, " ")}', flush=1) | |
| 1184 raise | |
| 1185 check_lines(regexes_stdout, cp.stdout) | |
| 1186 check_lines(regexes_stderr, cp.stderr) | |
| 1187 | |
| 1188 print(f'## Basic use of `logging` sends output to stderr instead of default stdout.') | |
| 1189 check( | |
| 1190 ''' | |
| 1191 import pymupdf | |
| 1192 pymupdf.message('this is pymupdf.message()') | |
| 1193 pymupdf.log('this is pymupdf.log()') | |
| 1194 pymupdf.set_messages(pylogging=1) | |
| 1195 pymupdf.set_log(pylogging=1) | |
| 1196 pymupdf.message('this is pymupdf.message() 2') | |
| 1197 pymupdf.log('this is pymupdf.log() 2') | |
| 1198 ''', | |
| 1199 [ | |
| 1200 log_prefix, | |
| 1201 'this is pymupdf.message[(][)]', | |
| 1202 '.+this is pymupdf.log[(][)]', | |
| 1203 ], | |
| 1204 [ | |
| 1205 'this is pymupdf.message[(][)] 2', | |
| 1206 '.+this is pymupdf.log[(][)] 2', | |
| 1207 ], | |
| 1208 ) | |
| 1209 | |
| 1210 print(f'## Calling logging.basicConfig() makes logging output contain <LEVEL>:<name> prefixes.') | |
| 1211 check( | |
| 1212 ''' | |
| 1213 import pymupdf | |
| 1214 | |
| 1215 import logging | |
| 1216 logging.basicConfig() | |
| 1217 pymupdf.set_messages(pylogging=1) | |
| 1218 pymupdf.set_log(pylogging=1) | |
| 1219 | |
| 1220 pymupdf.message('this is pymupdf.message()') | |
| 1221 pymupdf.log('this is pymupdf.log()') | |
| 1222 ''', | |
| 1223 [ | |
| 1224 log_prefix, | |
| 1225 ], | |
| 1226 [ | |
| 1227 'WARNING:pymupdf:this is pymupdf.message[(][)]', | |
| 1228 'WARNING:pymupdf:.+this is pymupdf.log[(][)]', | |
| 1229 ], | |
| 1230 ) | |
| 1231 | |
| 1232 print(f'## Setting PYMUPDF_USE_PYTHON_LOGGING=1 makes PyMuPDF use logging on startup.') | |
| 1233 check( | |
| 1234 ''' | |
| 1235 import pymupdf | |
| 1236 pymupdf.message('this is pymupdf.message()') | |
| 1237 pymupdf.log('this is pymupdf.log()') | |
| 1238 ''', | |
| 1239 '', | |
| 1240 [ | |
| 1241 log_prefix, | |
| 1242 'this is pymupdf.message[(][)]', | |
| 1243 '.+this is pymupdf.log[(][)]', | |
| 1244 ], | |
| 1245 env = dict( | |
| 1246 PYMUPDF_MESSAGE='logging:', | |
| 1247 PYMUPDF_LOG='logging:', | |
| 1248 ), | |
| 1249 ) | |
| 1250 | |
| 1251 print(f'## Pass explicit logger to pymupdf.use_python_logging() with logging.basicConfig().') | |
| 1252 check( | |
| 1253 ''' | |
| 1254 import pymupdf | |
| 1255 | |
| 1256 import logging | |
| 1257 logging.basicConfig() | |
| 1258 | |
| 1259 logger = logging.getLogger('foo') | |
| 1260 pymupdf.set_messages(pylogging_logger=logger, pylogging_level=logging.WARNING) | |
| 1261 pymupdf.set_log(pylogging_logger=logger, pylogging_level=logging.ERROR) | |
| 1262 | |
| 1263 pymupdf.message('this is pymupdf.message()') | |
| 1264 pymupdf.log('this is pymupdf.log()') | |
| 1265 ''', | |
| 1266 [ | |
| 1267 log_prefix, | |
| 1268 ], | |
| 1269 [ | |
| 1270 'WARNING:foo:this is pymupdf.message[(][)]', | |
| 1271 'ERROR:foo:.+this is pymupdf.log[(][)]', | |
| 1272 ], | |
| 1273 ) | |
| 1274 | |
| 1275 print(f'## Check pymupdf.set_messages() pylogging_level args.') | |
| 1276 check( | |
| 1277 ''' | |
| 1278 import pymupdf | |
| 1279 | |
| 1280 import logging | |
| 1281 logging.basicConfig(level=logging.DEBUG) | |
| 1282 logger = logging.getLogger('pymupdf') | |
| 1283 | |
| 1284 pymupdf.set_messages(pylogging_level=logging.CRITICAL) | |
| 1285 pymupdf.set_log(pylogging_level=logging.INFO) | |
| 1286 | |
| 1287 pymupdf.message('this is pymupdf.message()') | |
| 1288 pymupdf.log('this is pymupdf.log()') | |
| 1289 ''', | |
| 1290 [ | |
| 1291 log_prefix, | |
| 1292 ], | |
| 1293 [ | |
| 1294 'CRITICAL:pymupdf:this is pymupdf.message[(][)]', | |
| 1295 'INFO:pymupdf:.+this is pymupdf.log[(][)]', | |
| 1296 ], | |
| 1297 ) | |
| 1298 | |
| 1299 print(f'## Check messages() with sys.stdout=None.') | |
| 1300 check( | |
| 1301 ''' | |
| 1302 import sys | |
| 1303 sys.stdout = None | |
| 1304 import pymupdf | |
| 1305 | |
| 1306 pymupdf.message('this is pymupdf.message()') | |
| 1307 pymupdf.log('this is pymupdf.log()') | |
| 1308 ''', | |
| 1309 [], | |
| 1310 [], | |
| 1311 ) | |
| 1312 | |
| 1313 | |
| 1314 def relpath(path, start=None): | |
| 1315 ''' | |
| 1316 A 'safe' alternative to os.path.relpath(). Avoids an exception on Windows | |
| 1317 if the drive needs to change - in this case we use os.path.abspath(). | |
| 1318 ''' | |
| 1319 try: | |
| 1320 return os.path.relpath(path, start) | |
| 1321 except ValueError: | |
| 1322 # os.path.relpath() fails if trying to change drives. | |
| 1323 assert platform.system() == 'Windows' | |
| 1324 return os.path.abspath(path) | |
| 1325 | |
| 1326 | |
| 1327 def test_open(): | |
| 1328 | |
| 1329 if not hasattr(pymupdf, 'mupdf'): | |
| 1330 print('test_open(): not running on classic.') | |
| 1331 return | |
| 1332 | |
| 1333 import re | |
| 1334 import textwrap | |
| 1335 import traceback | |
| 1336 | |
| 1337 resources = relpath(os.path.abspath(f'{__file__}/../../tests/resources')) | |
| 1338 | |
| 1339 # We convert all strings to use `/` instead of os.sep, which avoids | |
| 1340 # problems with regex's on windows. | |
| 1341 resources = resources.replace(os.sep, '/') | |
| 1342 | |
| 1343 def check(filename=None, stream=None, filetype=None, exception=None): | |
| 1344 ''' | |
| 1345 Checks we receive expected exception if specified. | |
| 1346 ''' | |
| 1347 if isinstance(filename, str): | |
| 1348 filename = filename.replace(os.sep, '/') | |
| 1349 if exception: | |
| 1350 etype, eregex = exception | |
| 1351 if isinstance(eregex, (tuple, list)): | |
| 1352 # Treat as sequence of regexes to look for. | |
| 1353 eregex = '.*'.join(eregex) | |
| 1354 try: | |
| 1355 pymupdf.open(filename=filename, stream=stream, filetype=filetype) | |
| 1356 except etype as e: | |
| 1357 text = traceback.format_exc(limit=0) | |
| 1358 text = text.replace(os.sep, '/') | |
| 1359 text = textwrap.indent(text, ' ', lambda line: 1) | |
| 1360 assert re.search(eregex, text, re.DOTALL), \ | |
| 1361 f'Incorrect exception text, expected {eregex=}, received:\n{text}' | |
| 1362 print(f'Received expected exception for {filename=} {stream=} {filetype=}:\n{text}') | |
| 1363 except Exception as e: | |
| 1364 assert 0, \ | |
| 1365 f'Incorrect exception, expected {etype}, received {type(e)=}.' | |
| 1366 else: | |
| 1367 assert 0, f'Did not received exception, expected {etype=}. {filename=} {stream=} {filetype=} {exception=}' | |
| 1368 else: | |
| 1369 document = pymupdf.open(filename=filename, stream=stream, filetype=filetype) | |
| 1370 return document | |
| 1371 | |
| 1372 check(f'{resources}/1.pdf') | |
| 1373 | |
| 1374 check(f'{resources}/Bezier.epub') | |
| 1375 | |
| 1376 path = 1234 | |
| 1377 etype = TypeError | |
| 1378 eregex = re.escape(f'bad filename: type(filename)=<class \'int\'> filename={path}.') | |
| 1379 check(path, exception=(etype, eregex)) | |
| 1380 | |
| 1381 path = 'test_open-this-file-will-not-exist' | |
| 1382 etype = pymupdf.FileNotFoundError | |
| 1383 eregex = f'no such file: \'{path}\'' | |
| 1384 check(path, exception=(etype, eregex)) | |
| 1385 | |
| 1386 path = resources | |
| 1387 etype = pymupdf.FileDataError | |
| 1388 eregex = re.escape(f'\'{path}\' is no file') | |
| 1389 check(path, exception=(etype, eregex)) | |
| 1390 | |
| 1391 path = relpath(os.path.abspath(f'{resources}/../test_open_empty')) | |
| 1392 path = path.replace(os.sep, '/') | |
| 1393 with open(path, 'w') as f: | |
| 1394 pass | |
| 1395 etype = pymupdf.EmptyFileError | |
| 1396 eregex = re.escape(f'Cannot open empty file: filename={path!r}.') | |
| 1397 check(path, exception=(etype, eregex)) | |
| 1398 | |
| 1399 path = f'{resources}/1.pdf' | |
| 1400 filetype = 'xps' | |
| 1401 etype = pymupdf.FileDataError | |
| 1402 # 2023-12-12: On OpenBSD, for some reason the SWIG catch code only catches | |
| 1403 # the exception as FzErrorBase. | |
| 1404 etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorFormat' | |
| 1405 eregex = ( | |
| 1406 # With a sysinstall with separate MuPDF install, we get | |
| 1407 # `mupdf.FzErrorFormat` instead of `pymupdf.mupdf.FzErrorFormat`. So | |
| 1408 # we just search for the former. | |
| 1409 re.escape(f'mupdf.{etype2}: code=7: cannot recognize zip archive'), | |
| 1410 re.escape(f'pymupdf.FileDataError: Failed to open file {path!r} as type {filetype!r}.'), | |
| 1411 ) | |
| 1412 check(path, filetype=filetype, exception=None) | |
| 1413 | |
| 1414 path = f'{resources}/chinese-tables.pickle' | |
| 1415 etype = pymupdf.FileDataError | |
| 1416 etype2 = 'FzErrorBase' if platform.system() == 'OpenBSD' else 'FzErrorUnsupported' | |
| 1417 etext = ( | |
| 1418 re.escape(f'mupdf.{etype2}: code=6: cannot find document handler for file: {path}'), | |
| 1419 re.escape(f'pymupdf.FileDataError: Failed to open file {path!r}.'), | |
| 1420 ) | |
| 1421 check(path, exception=(etype, etext)) | |
| 1422 | |
| 1423 stream = 123 | |
| 1424 etype = TypeError | |
| 1425 etext = re.escape('bad stream: type(stream)=<class \'int\'>.') | |
| 1426 check(stream=stream, exception=(etype, etext)) | |
| 1427 | |
| 1428 check(stream=b'', exception=(pymupdf.EmptyFileError, re.escape('Cannot open empty stream.'))) | |
| 1429 | |
| 1430 | |
| 1431 def test_open2(): | |
| 1432 ''' | |
| 1433 Checks behaviour of fz_open_document() and fz_open_document_with_stream() | |
| 1434 with different filenames/magic values. | |
| 1435 ''' | |
| 1436 if platform.system() == 'Windows': | |
| 1437 print(f'test_open2(): not running on Windows because `git ls-files` known fail on Github Windows runners.') | |
| 1438 return | |
| 1439 | |
| 1440 root = os.path.normpath(f'{__file__}/../..') | |
| 1441 root = relpath(root) | |
| 1442 | |
| 1443 # Find tests/resources/test_open2.* input files/streams. We calculate | |
| 1444 # paths relative to the PyMuPDF checkout directory <root>, to allow use | |
| 1445 # of tests/resources/test_open2_expected.json regardless of the actual | |
| 1446 # checkout directory. | |
| 1447 print() | |
| 1448 sys.path.append(root) | |
| 1449 try: | |
| 1450 import pipcl | |
| 1451 finally: | |
| 1452 del sys.path[0] | |
| 1453 paths = pipcl.git_items(f'{root}/tests/resources') | |
| 1454 paths = fnmatch.filter(paths, f'test_open2.*') | |
| 1455 paths = [f'tests/resources/{i}' for i in paths] | |
| 1456 | |
| 1457 # Get list of extensions of input files. | |
| 1458 extensions = set() | |
| 1459 extensions.add('.txt') | |
| 1460 extensions.add('') | |
| 1461 for path in paths: | |
| 1462 _, ext = os.path.splitext(path) | |
| 1463 extensions.add(ext) | |
| 1464 extensions = sorted(list(extensions)) | |
| 1465 | |
| 1466 def get_result(e, document): | |
| 1467 ''' | |
| 1468 Return fz_lookup_metadata(document, 'format') or [ERROR]. | |
| 1469 ''' | |
| 1470 if e: | |
| 1471 return f'[error]' | |
| 1472 else: | |
| 1473 try: | |
| 1474 return pymupdf.mupdf.fz_lookup_metadata2(document, 'format') | |
| 1475 except Exception: | |
| 1476 return '' | |
| 1477 | |
| 1478 def dict_set_path(dict_, *items): | |
| 1479 for item in items[:-2]: | |
| 1480 dict_ = dict_.setdefault(item, dict()) | |
| 1481 dict_[items[-2]] = items[-1] | |
| 1482 | |
| 1483 results = dict() | |
| 1484 | |
| 1485 # Prevent warnings while we are running. | |
| 1486 _g_out_message = pymupdf._g_out_message | |
| 1487 pymupdf._g_out_message = None | |
| 1488 try: | |
| 1489 results = dict() | |
| 1490 | |
| 1491 for path in paths: | |
| 1492 print(path) | |
| 1493 for ext in extensions: | |
| 1494 path2 = f'{root}/foo{ext}' | |
| 1495 path3 = shutil.copy2(f'{root}/{path}', path2) | |
| 1496 assert(path3 == path2) | |
| 1497 | |
| 1498 # Test fz_open_document(). | |
| 1499 e = None | |
| 1500 document = None | |
| 1501 try: | |
| 1502 document = pymupdf.mupdf.fz_open_document(path2) | |
| 1503 except Exception as ee: | |
| 1504 e = ee | |
| 1505 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1506 text = get_result(e, document) | |
| 1507 print(f' fz_open_document({path2}) => {text}') | |
| 1508 dict_set_path(results, path, ext, 'file', text) | |
| 1509 | |
| 1510 # Test fz_open_document_with_stream(). | |
| 1511 e = None | |
| 1512 document = None | |
| 1513 with open(f'{root}/{path}', 'rb') as f: | |
| 1514 data = f.read() | |
| 1515 stream = pymupdf.mupdf.fz_open_memory(pymupdf.mupdf.python_buffer_data(data), len(data)) | |
| 1516 try: | |
| 1517 document = pymupdf.mupdf.fz_open_document_with_stream(ext, stream) | |
| 1518 except Exception as ee: | |
| 1519 e = ee | |
| 1520 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1521 text = get_result(e, document) | |
| 1522 print(f' fz_open_document_with_stream(magic={ext!r}) => {text}') | |
| 1523 dict_set_path(results, path, ext, 'stream', text) | |
| 1524 | |
| 1525 finally: | |
| 1526 pymupdf._g_out_message = _g_out_message | |
| 1527 | |
| 1528 # Create html table. | |
| 1529 path_html = os.path.normpath(f'{__file__}/../../tests/test_open2.html') | |
| 1530 with open(path_html, 'w') as f: | |
| 1531 f.write(f'<html>\n') | |
| 1532 f.write(f'<body>\n') | |
| 1533 f.write(f'<p>{time.strftime("%F-%T")}\n') | |
| 1534 f.write(f'<table border="1" style="border-collapse:collapse" cellpadding="4">\n') | |
| 1535 f.write(f'<tr><td></td><th colspan="{len(extensions)}">Extension/magic') | |
| 1536 f.write(f'<tr><th style="border-bottom: 4px solid black; border-right: 4px solid black;">Data file</th>') | |
| 1537 for ext in extensions: | |
| 1538 f.write(f'<th style="border-bottom: 4px solid black;">{ext}</th>') | |
| 1539 f.write('\n') | |
| 1540 for path in sorted(results.keys()): | |
| 1541 _, ext = os.path.splitext(path) | |
| 1542 f.write(f'<tr><th style="border-right: 4px solid black;">{os.path.basename(path)}</th>') | |
| 1543 for ext2 in sorted(results[path].keys()): | |
| 1544 text_file = results[path][ext2]['file'] | |
| 1545 text_stream = results[path][ext2]['stream'] | |
| 1546 b1, b2 = ('<b>', '</b>') if ext2==ext else ('', '') | |
| 1547 if text_file == text_stream: | |
| 1548 if text_file == '[error]': | |
| 1549 f.write(f'<td><div style="color: #808080;">{b1}{text_file}{b2}</div></td>') | |
| 1550 else: | |
| 1551 f.write(f'<td>{b1}{text_file}{b2}</td>') | |
| 1552 else: | |
| 1553 f.write(f'<td>file: {b1}{text_file}{b2}<br>') | |
| 1554 f.write(f'stream: {b1}{text_stream}{b2}</td>') | |
| 1555 f.write('</tr>\n') | |
| 1556 f.write(f'</table>\n') | |
| 1557 f.write(f'/<body>\n') | |
| 1558 f.write(f'</html>\n') | |
| 1559 print(f'Have created: {path_html}') | |
| 1560 | |
| 1561 path_out = os.path.normpath(f'{__file__}/../../tests/test_open2.json') | |
| 1562 with open(path_out, 'w') as f: | |
| 1563 json.dump(results, f, indent=4, sort_keys=1) | |
| 1564 | |
| 1565 if pymupdf.mupdf_version_tuple >= (1, 26): | |
| 1566 with open(os.path.normpath(f'{__file__}/../../tests/resources/test_open2_expected.json')) as f: | |
| 1567 results_expected = json.load(f) | |
| 1568 if results != results_expected: | |
| 1569 print(f'results != results_expected:') | |
| 1570 def show(r, name): | |
| 1571 text = json.dumps(r, indent=4, sort_keys=1) | |
| 1572 print(f'{name}:') | |
| 1573 print(textwrap.indent(text, ' ')) | |
| 1574 show(results_expected, 'results_expected') | |
| 1575 show(results, 'results') | |
| 1576 assert 0 | |
| 1577 | |
| 1578 | |
| 1579 def test_533(): | |
| 1580 if not hasattr(pymupdf, 'mupdf'): | |
| 1581 print('test_533(): Not running on classic.') | |
| 1582 return | |
| 1583 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') | |
| 1584 doc = pymupdf.open(path) | |
| 1585 print() | |
| 1586 for p in doc: | |
| 1587 print(f'test_533(): for p in doc: {p=}.') | |
| 1588 for p in list(doc)[:]: | |
| 1589 print(f'test_533(): for p in list(doc)[:]: {p=}.') | |
| 1590 for p in doc[:]: | |
| 1591 print(f'test_533(): for p in doc[:]: {p=}.') | |
| 1592 | |
| 1593 def test_3354(): | |
| 1594 document = pymupdf.open(filename) | |
| 1595 v = dict(foo='bar') | |
| 1596 document.metadata = v | |
| 1597 assert document.metadata == v | |
| 1598 | |
| 1599 def test_scientific_numbers(): | |
| 1600 ''' | |
| 1601 This is #3381. | |
| 1602 ''' | |
| 1603 doc = pymupdf.open() | |
| 1604 page = doc.new_page(width=595, height=842) | |
| 1605 point = pymupdf.Point(1e-11, -1e-10) | |
| 1606 page.insert_text(point, "Test") | |
| 1607 contents = page.read_contents() | |
| 1608 print(f'{contents=}') | |
| 1609 assert b" 1e-" not in contents | |
| 1610 | |
| 1611 def test_3615(): | |
| 1612 print('') | |
| 1613 print(f'{pymupdf.pymupdf_version=}', flush=1) | |
| 1614 print(f'{pymupdf.VersionBind=}', flush=1) | |
| 1615 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3615.epub') | |
| 1616 doc = pymupdf.open(path) | |
| 1617 print(doc.pagemode) | |
| 1618 print(doc.pagelayout) | |
| 1619 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1620 assert wt | |
| 1621 | |
| 1622 def test_3654(): | |
| 1623 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3654.docx') | |
| 1624 content = "" | |
| 1625 with pymupdf.open(path) as document: | |
| 1626 for page in document: | |
| 1627 content += page.get_text() + '\n\n' | |
| 1628 content = content.strip() | |
| 1629 | |
| 1630 def test_3727(): | |
| 1631 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3727.pdf') | |
| 1632 doc = pymupdf.open(path) | |
| 1633 for page in doc: | |
| 1634 page.get_pixmap(matrix = pymupdf.Matrix(2,2)) | |
| 1635 | |
| 1636 def test_3569(): | |
| 1637 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3569.pdf') | |
| 1638 document = pymupdf.open(path) | |
| 1639 page = document[0] | |
| 1640 svg = page.get_svg_image(text_as_path=False) | |
| 1641 print(f'{svg=}') | |
| 1642 if pymupdf.mupdf_version_tuple >= (1, 27): | |
| 1643 assert svg == ( | |
| 1644 '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n' | |
| 1645 '<defs>\n' | |
| 1646 '<clipPath id="clip_1">\n' | |
| 1647 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n' | |
| 1648 '</clipPath>\n' | |
| 1649 '<clipPath id="clip_2">\n' | |
| 1650 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n' | |
| 1651 '</clipPath>\n' | |
| 1652 '</defs>\n' | |
| 1653 '<g clip-path="url(#clip_1)">\n' | |
| 1654 '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n' | |
| 1655 '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n' | |
| 1656 '</g>\n' | |
| 1657 '</g>\n' | |
| 1658 '<g clip-path="url(#clip_2)">\n' | |
| 1659 '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n' | |
| 1660 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n' | |
| 1661 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n' | |
| 1662 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n' | |
| 1663 '</g>\n' | |
| 1664 '</g>\n' | |
| 1665 '</svg>\n' | |
| 1666 ) | |
| 1667 else: | |
| 1668 assert svg == ( | |
| 1669 '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" version="1.1" width="3024" height="2160" viewBox="0 0 3024 2160">\n' | |
| 1670 '<defs>\n' | |
| 1671 '<clipPath id="clip_1">\n' | |
| 1672 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M25432 10909H29692V15642H25432V10909"/>\n' | |
| 1673 '</clipPath>\n' | |
| 1674 '<clipPath id="clip_2">\n' | |
| 1675 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M28526 38017 31807 40376V40379L31312 41314V42889H28202L25092 42888V42887L28524 38017H28526"/>\n' | |
| 1676 '</clipPath>\n' | |
| 1677 '</defs>\n' | |
| 1678 '<g clip-path="url(#clip_1)">\n' | |
| 1679 '<g inkscape:groupmode="layer" inkscape:label="CED - Text">\n' | |
| 1680 '<text xml:space="preserve" transform="matrix(.06 0 0 .06 3024 2160)" font-size="174.644" font-family="ArialMT"><tspan y="-28538" x="-14909 -14841.063 -14773.127 -14676.024 -14578.922 -14520.766 -14423.663">**L1-13</tspan></text>\n' | |
| 1681 '</g>\n' | |
| 1682 '</g>\n' | |
| 1683 '<g clip-path="url(#clip_2)">\n' | |
| 1684 '<g inkscape:groupmode="layer" inkscape:label="Level 03|S-COLS">\n' | |
| 1685 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z" fill="#7f7f7f"/>\n' | |
| 1686 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="0" stroke-linecap="butt" stroke-miterlimit="10" stroke-linejoin="miter" fill="none" stroke="#7f7f7f" d="M31130 41483V42083L30530 41483ZM31130 42083 30530 41483V42083Z"/>\n' | |
| 1687 '<path transform="matrix(0,-.06,-.06,-0,3024,2160)" stroke-width="9" stroke-linecap="round" stroke-linejoin="round" fill="none" stroke="#7f7f7f" d="M30530 41483H31130V42083H30530V41483"/>\n' | |
| 1688 '</g>\n' | |
| 1689 '</g>\n' | |
| 1690 '</svg>\n' | |
| 1691 ) | |
| 1692 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1693 assert wt == 'unknown cid collection: PDFAUTOCAD-Indentity0\nnon-embedded font using identity encoding: ArialMT (mapping via )\ninvalid marked content and clip nesting' | |
| 1694 | |
| 1695 def test_3450(): | |
| 1696 # This issue is a slow-down, so we just show time taken - it's not safe | |
| 1697 # to fail if test takes too long because that can give spurious failures | |
| 1698 # depending on hardware etc. | |
| 1699 # | |
| 1700 # On a mac-mini, PyMuPDF-1.24.8 takes 60s, PyMuPDF-1.24.9 takes 4s. | |
| 1701 # | |
| 1702 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': | |
| 1703 print(f'test_3450(): not running on valgrind because very slow.', flush=1) | |
| 1704 return | |
| 1705 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3450.pdf') | |
| 1706 pdf = pymupdf.open(path) | |
| 1707 page = pdf[0] | |
| 1708 t = time.time() | |
| 1709 pix = page.get_pixmap(alpha=False, dpi=150) | |
| 1710 t = time.time() - t | |
| 1711 print(f'test_3450(): {t=}') | |
| 1712 | |
| 1713 def test_3859(): | |
| 1714 print(f'{pymupdf.mupdf.PDF_NULL=}.') | |
| 1715 print(f'{pymupdf.mupdf.PDF_TRUE=}.') | |
| 1716 print(f'{pymupdf.mupdf.PDF_FALSE=}.') | |
| 1717 for name in ('NULL', 'TRUE', 'FALSE'): | |
| 1718 name2 = f'PDF_{name}' | |
| 1719 v = getattr(pymupdf.mupdf, name2) | |
| 1720 print(f'{name=} {name2=} {v=} {type(v)=}') | |
| 1721 assert type(v)==pymupdf.mupdf.PdfObj, f'`v` is not a pymupdf.mupdf.PdfObj.' | |
| 1722 | |
| 1723 def test_3905(): | |
| 1724 data = b'A,B,C,D\r\n1,2,1,2\r\n2,2,1,2\r\n' | |
| 1725 try: | |
| 1726 document = pymupdf.open(stream=data, filetype='pdf') | |
| 1727 except pymupdf.FileDataError as e: | |
| 1728 print(f'test_3905(): e: {e}') | |
| 1729 else: | |
| 1730 assert 0 | |
| 1731 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1732 if pymupdf.mupdf_version_tuple >= (1, 26): | |
| 1733 assert wt == 'format error: cannot find version marker\ntrying to repair broken xref\nrepairing PDF document' | |
| 1734 else: | |
| 1735 assert wt == 'format error: cannot recognize version marker\ntrying to repair broken xref\nrepairing PDF document' | |
| 1736 | |
| 1737 def test_3624(): | |
| 1738 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3624.pdf') | |
| 1739 path_png_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_3624_expected.png') | |
| 1740 path_png = os.path.normpath(f'{__file__}/../../tests/test_3624.png') | |
| 1741 with pymupdf.open(path) as document: | |
| 1742 page = document[0] | |
| 1743 pixmap = page.get_pixmap(matrix=pymupdf.Matrix(2, 2)) | |
| 1744 print(f'Saving to {path_png=}.') | |
| 1745 pixmap.save(path_png) | |
| 1746 rms = gentle_compare.pixmaps_rms(path_png_expected, path_png) | |
| 1747 print(f'{rms=}') | |
| 1748 # We get small differences in sysinstall tests, where some thirdparty | |
| 1749 # libraries can differ. | |
| 1750 if rms > 1: | |
| 1751 pixmap_diff = gentle_compare.pixmaps_diff(path_png_expected, path_png) | |
| 1752 path_png_diff = os.path.normpath(f'{__file__}/../../tests/test_3624_diff.png') | |
| 1753 pixmap_diff.save(path_png_diff) | |
| 1754 assert 0, f'{rms=}' | |
| 1755 | |
| 1756 | |
| 1757 def test_4043(): | |
| 1758 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4043.pdf') | |
| 1759 doc = pymupdf.open(path) | |
| 1760 doc.fullcopy_page(1) | |
| 1761 | |
| 1762 | |
| 1763 def test_4018(): | |
| 1764 document = pymupdf.open() | |
| 1765 for page in document.pages(-1, -1): | |
| 1766 pass | |
| 1767 | |
| 1768 def test_4034(): | |
| 1769 # tests/resources/test_4034.pdf is first two pages of input file in | |
| 1770 # https://github.com/pymupdf/PyMuPDF/issues/4034. | |
| 1771 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4034.pdf') | |
| 1772 path_clean = os.path.normpath(f'{__file__}/../../tests/test_4034_out.pdf') | |
| 1773 with pymupdf.open(path) as document: | |
| 1774 pixmap1 = document[0].get_pixmap() | |
| 1775 document.save(path_clean, clean=1) | |
| 1776 with pymupdf.open(path_clean) as document: | |
| 1777 page = document[0] | |
| 1778 pixmap2 = document[0].get_pixmap() | |
| 1779 rms = gentle_compare.pixmaps_rms(pixmap1, pixmap2) | |
| 1780 print(f'test_4034(): Comparison of original/cleaned page 0 pixmaps: {rms=}.') | |
| 1781 if pymupdf.mupdf_version_tuple < (1, 25, 2): | |
| 1782 assert 30 < rms < 50 | |
| 1783 else: | |
| 1784 assert rms == 0 | |
| 1785 | |
| 1786 def test_4309(): | |
| 1787 document = pymupdf.open() | |
| 1788 page = document.new_page() | |
| 1789 document.delete_page() | |
| 1790 | |
| 1791 def test_4263(): | |
| 1792 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4263.pdf') | |
| 1793 path_out = f'{path}.linerarized.pdf' | |
| 1794 command = f'pymupdf clean -linear {path} {path_out}' | |
| 1795 print(f'Running: {command}') | |
| 1796 cp = subprocess.run(command, shell=1, check=0) | |
| 1797 if pymupdf.mupdf_version_tuple < (1, 26): | |
| 1798 assert cp.returncode == 0 | |
| 1799 else: | |
| 1800 # Support for linerarisation dropped in MuPDF-1.26. | |
| 1801 assert cp.returncode | |
| 1802 | |
| 1803 def test_4224(): | |
| 1804 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4224.pdf') | |
| 1805 with pymupdf.open(path) as document: | |
| 1806 for page in document.pages(): | |
| 1807 pixmap = page.get_pixmap(dpi=150) | |
| 1808 path_pixmap = f'{path}.{page.number}.png' | |
| 1809 pixmap.save(path_pixmap) | |
| 1810 print(f'Have created: {path_pixmap}') | |
| 1811 if pymupdf.mupdf_version_tuple < (1, 25, 5): | |
| 1812 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 1813 assert wt == 'format error: negative code in 1d faxd\npadding truncated image' | |
| 1814 | |
| 1815 def test_4319(): | |
| 1816 # Have not seen this test reproduce issue #4319, but keeping it anyway. | |
| 1817 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4319.pdf') | |
| 1818 doc = pymupdf.open() | |
| 1819 page = doc.new_page() | |
| 1820 page.insert_text((10, 100), "some text") | |
| 1821 doc.save(path) | |
| 1822 doc.close() | |
| 1823 doc = pymupdf.open(path) | |
| 1824 page = doc[0] | |
| 1825 pc = doc.page_count | |
| 1826 doc.close() | |
| 1827 os.remove(path) | |
| 1828 print(f"removed {doc.name=}") | |
| 1829 | |
| 1830 def test_3886(): | |
| 1831 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3886.pdf') | |
| 1832 path_clean0 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean0.pdf') | |
| 1833 path_clean1 = os.path.normpath(f'{__file__}/../../tests/resources/test_3886_clean1.pdf') | |
| 1834 | |
| 1835 with pymupdf.open(path) as document: | |
| 1836 pixmap = document[0].get_pixmap() | |
| 1837 document.save(path_clean0, clean=0) | |
| 1838 | |
| 1839 with pymupdf.open(path) as document: | |
| 1840 document.save(path_clean1, clean=1) | |
| 1841 | |
| 1842 with pymupdf.open(path_clean0) as document: | |
| 1843 pixmap_clean0 = document[0].get_pixmap() | |
| 1844 | |
| 1845 with pymupdf.open(path_clean1) as document: | |
| 1846 pixmap_clean1 = document[0].get_pixmap() | |
| 1847 | |
| 1848 rms_0 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean0) | |
| 1849 rms_1 = gentle_compare.pixmaps_rms(pixmap, pixmap_clean1) | |
| 1850 print(f'test_3886(): {rms_0=} {rms_1=}') | |
| 1851 | |
| 1852 def test_4415(): | |
| 1853 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4415.pdf') | |
| 1854 path_out = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out.png') | |
| 1855 path_out_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_4415_out_expected.png') | |
| 1856 with pymupdf.open(path) as document: | |
| 1857 page = document[0] | |
| 1858 rot = page.rotation | |
| 1859 orig = pymupdf.Point(100, 100) # apparent insertion point | |
| 1860 text = 'Text at Top-Left' | |
| 1861 mrot = page.derotation_matrix # matrix annihilating page rotation | |
| 1862 page.insert_text(orig * mrot, text, fontsize=60, rotate=rot) | |
| 1863 pixmap = page.get_pixmap() | |
| 1864 pixmap.save(path_out) | |
| 1865 rms = gentle_compare.pixmaps_rms(path_out_expected, path_out) | |
| 1866 assert rms == 0, f'{rms=}' | |
| 1867 | |
| 1868 def test_4466(): | |
| 1869 path = os.path.normpath(f'{__file__}/../../tests/test_4466.pdf') | |
| 1870 with pymupdf.Document(path) as document: | |
| 1871 for page in document: | |
| 1872 print(f'{page=}', flush=1) | |
| 1873 pixmap = page.get_pixmap(clip=(0, 0, 10, 10)) | |
| 1874 print(f'{pixmap.n=} {pixmap.size=} {pixmap.stride=} {pixmap.width=} {pixmap.height=} {pixmap.x=} {pixmap.y=}', flush=1) | |
| 1875 pixmap.is_unicolor # Used to crash. | |
| 1876 | |
| 1877 | |
| 1878 def test_4479(): | |
| 1879 # This passes with pymupdf-1.24.14, fails with pymupdf==1.25.*, passes with | |
| 1880 # pymupdf-1.26.0. | |
| 1881 print() | |
| 1882 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4479.pdf') | |
| 1883 with pymupdf.open(path) as document: | |
| 1884 | |
| 1885 def show(items): | |
| 1886 for item in items: | |
| 1887 print(f' {repr(item)}') | |
| 1888 | |
| 1889 items = document.layer_ui_configs() | |
| 1890 show(items) | |
| 1891 assert items == [ | |
| 1892 {'depth': 0, 'locked': 0, 'number': 0, 'on': 1, 'text': 'layer_0', 'type': 'checkbox'}, | |
| 1893 {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'}, | |
| 1894 {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'}, | |
| 1895 {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'}, | |
| 1896 {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'}, | |
| 1897 {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'}, | |
| 1898 {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'}, | |
| 1899 {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'}, | |
| 1900 ] | |
| 1901 | |
| 1902 document.set_layer_ui_config(0, pymupdf.PDF_OC_OFF) | |
| 1903 items = document.layer_ui_configs() | |
| 1904 show(items) | |
| 1905 assert items == [ | |
| 1906 {'depth': 0, 'locked': 0, 'number': 0, 'on': 0, 'text': 'layer_0', 'type': 'checkbox'}, | |
| 1907 {'depth': 0, 'locked': 0, 'number': 1, 'on': 1, 'text': 'layer_1', 'type': 'checkbox'}, | |
| 1908 {'depth': 0, 'locked': 0, 'number': 2, 'on': 0, 'text': 'layer_2', 'type': 'checkbox'}, | |
| 1909 {'depth': 0, 'locked': 0, 'number': 3, 'on': 1, 'text': 'layer_3', 'type': 'checkbox'}, | |
| 1910 {'depth': 0, 'locked': 0, 'number': 4, 'on': 1, 'text': 'layer_4', 'type': 'checkbox'}, | |
| 1911 {'depth': 0, 'locked': 0, 'number': 5, 'on': 1, 'text': 'layer_5', 'type': 'checkbox'}, | |
| 1912 {'depth': 0, 'locked': 0, 'number': 6, 'on': 1, 'text': 'layer_6', 'type': 'checkbox'}, | |
| 1913 {'depth': 0, 'locked': 0, 'number': 7, 'on': 1, 'text': 'layer_7', 'type': 'checkbox'}, | |
| 1914 ] | |
| 1915 | |
| 1916 | |
| 1917 def test_4533(): | |
| 1918 print() | |
| 1919 path = util.download( | |
| 1920 'https://github.com/user-attachments/files/20497146/NineData_user_manual_V3.0.5.pdf', | |
| 1921 'test_4533.pdf', | |
| 1922 size=16864501, | |
| 1923 ) | |
| 1924 # This bug is a segv so we run the test in a child process. | |
| 1925 command = f'{sys.executable} -c "import pymupdf; document = pymupdf.open({path!r}); print(len(document))"' | |
| 1926 print(f'Running: {command}') | |
| 1927 cp = subprocess.run(command, shell=1, check=0) | |
| 1928 e = cp.returncode | |
| 1929 print(f'{e=}') | |
| 1930 if pymupdf.mupdf_version_tuple >= (1, 26, 6): | |
| 1931 assert e == 0 | |
| 1932 else: | |
| 1933 assert e != 0 | |
| 1934 | |
| 1935 | |
| 1936 def test_4564(): | |
| 1937 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4564.pdf') | |
| 1938 print() | |
| 1939 with pymupdf.open(path) as document: | |
| 1940 for key in sorted(document.metadata.keys()): | |
| 1941 value = document.metadata[key] | |
| 1942 print(f'{key}: {value!r}') | |
| 1943 if pymupdf.mupdf_version_tuple >= (1, 27): | |
| 1944 assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\x00' | |
| 1945 else: | |
| 1946 assert document.metadata['producer'] == 'Adobe PSL 1.3e for Canon\udcc0\udc80' | |
| 1947 | |
| 1948 | |
| 1949 def test_4496(): | |
| 1950 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4496.hwpx') | |
| 1951 with pymupdf.open(path) as document: | |
| 1952 print(document.page_count) | |
| 1953 | |
| 1954 | |
| 1955 def test_gitinfo(): | |
| 1956 # This doesn't really test very much, but can be useful to see the current | |
| 1957 # values. | |
| 1958 print('') | |
| 1959 print(f'test_4496():') | |
| 1960 print(f'{pymupdf.mupdf_location=}') | |
| 1961 print(f'{pymupdf.mupdf_version=}') | |
| 1962 print(f'{pymupdf.pymupdf_git_branch=}') | |
| 1963 print(f'{pymupdf.pymupdf_git_sha=}') | |
| 1964 print(f'{pymupdf.pymupdf_version=}') | |
| 1965 print(f'pymupdf.pymupdf_git_diff:\n{textwrap.indent(pymupdf.pymupdf_git_diff, " ")}') | |
| 1966 | |
| 1967 | |
| 1968 def test_4392(): | |
| 1969 print() | |
| 1970 path = os.path.normpath(f'{__file__}/../../tests/test_4392.py') | |
| 1971 with open(path, 'w') as f: | |
| 1972 f.write('import pymupdf\n') | |
| 1973 | |
| 1974 command = f'pytest {path}' | |
| 1975 print(f'Running: {command}', flush=1) | |
| 1976 e1 = subprocess.run(command, shell=1, check=0).returncode | |
| 1977 print(f'{e1=}') | |
| 1978 | |
| 1979 command = f'pytest -Werror {path}' | |
| 1980 print(f'Running: {command}', flush=1) | |
| 1981 e2 = subprocess.run(command, shell=1, check=0).returncode | |
| 1982 print(f'{e2=}') | |
| 1983 | |
| 1984 command = f'{sys.executable} -Werror -c "import pymupdf"' | |
| 1985 print(f'Running: {command}', flush=1) | |
| 1986 e3 = subprocess.run(command, shell=1, check=0).returncode | |
| 1987 print(f'{e3=}') | |
| 1988 | |
| 1989 print(f'{e1=} {e2=} {e3=}') | |
| 1990 | |
| 1991 print(f'{pymupdf.swig_version=}') | |
| 1992 print(f'{pymupdf.swig_version_tuple=}') | |
| 1993 | |
| 1994 assert e1 == 5 | |
| 1995 if pymupdf.swig_version_tuple >= (4, 4): | |
| 1996 assert e2 == 5 | |
| 1997 assert e3 == 0 | |
| 1998 else: | |
| 1999 # We get SEGV's etc with older swig. | |
| 2000 if platform.system() == 'Windows': | |
| 2001 assert (e2, e3) == (0xc0000005, 0xc0000005) | |
| 2002 else: | |
| 2003 # On plain linux we get (139, 139). On manylinux we get (-11, | |
| 2004 # -11). On MacOS we get (-11, -11). | |
| 2005 assert (e2, e3) == (139, 139) or (e2, e3) == (-11, -11) | |
| 2006 | |
| 2007 | |
| 2008 def test_4639(): | |
| 2009 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4639.pdf') | |
| 2010 with pymupdf.open(path) as document: | |
| 2011 page = document[-1] | |
| 2012 page.get_bboxlog(layers=True) | |
| 2013 | |
| 2014 | |
| 2015 def test_4590(): | |
| 2016 | |
| 2017 # Create test PDF. | |
| 2018 path = os.path.normpath(f'{__file__}/../../tests/test_4590.pdf') | |
| 2019 with pymupdf.open() as document: | |
| 2020 page = document.new_page() | |
| 2021 | |
| 2022 # Add some text | |
| 2023 text = 'This PDF contains a file attachment annotation.' | |
| 2024 page.insert_text((72, 72), text, fontsize=12) | |
| 2025 | |
| 2026 # Create a sample file. | |
| 2027 path_sample = os.path.normpath(f'{__file__}/../../tests/test_4590_annotation_sample.txt') | |
| 2028 with open(path_sample, 'w') as f: | |
| 2029 f.write('This is a sample attachment file.') | |
| 2030 | |
| 2031 # Read file as bytes | |
| 2032 with open(path_sample, 'rb') as f: | |
| 2033 sample = f.read() | |
| 2034 | |
| 2035 # Define annotation position (rect or point) | |
| 2036 annot_pos = pymupdf.Rect(72, 100, 92, 120) # PushPin icon rectangle | |
| 2037 | |
| 2038 # Add the file attachment annotation | |
| 2039 page.add_file_annot( | |
| 2040 point = annot_pos, | |
| 2041 buffer_ = sample, | |
| 2042 filename = 'sample.txt', | |
| 2043 ufilename = 'sample.txt', | |
| 2044 desc = 'A test attachment file.', | |
| 2045 icon = 'PushPin', | |
| 2046 ) | |
| 2047 | |
| 2048 # Save the PDF | |
| 2049 document.save(path) | |
| 2050 | |
| 2051 # Check pymupdf.Document.scrub() works. | |
| 2052 with pymupdf.open(path) as document: | |
| 2053 document.scrub() |
