Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_mupdf_regressions.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 import pymupdf | |
| 2 import os | |
| 3 import gentle_compare | |
| 4 | |
| 5 scriptdir = os.path.abspath(os.path.dirname(__file__)) | |
| 6 | |
| 7 | |
| 8 def test_707448(): | |
| 9 """Confirm page content cleaning does not destroy page appearance.""" | |
| 10 filename = os.path.join(scriptdir, "resources", "test-707448.pdf") | |
| 11 doc = pymupdf.open(filename) | |
| 12 page = doc[0] | |
| 13 words0 = page.get_text("words") | |
| 14 page.clean_contents(sanitize=True) | |
| 15 words1 = page.get_text("words") | |
| 16 assert gentle_compare.gentle_compare(words0, words1) | |
| 17 | |
| 18 | |
| 19 def test_707673(): | |
| 20 """Confirm page content cleaning does not destroy page appearance. | |
| 21 | |
| 22 Fails starting with MuPDF v1.23.9. | |
| 23 | |
| 24 Fixed in: | |
| 25 commit 779b8234529cb82aa1e92826854c7bb98b19e44b (golden/master) | |
| 26 """ | |
| 27 filename = os.path.join(scriptdir, "resources", "test-707673.pdf") | |
| 28 doc = pymupdf.open(filename) | |
| 29 page = doc[0] | |
| 30 words0 = page.get_text("words") | |
| 31 page.clean_contents(sanitize=True) | |
| 32 words1 = page.get_text("words") | |
| 33 ok = gentle_compare.gentle_compare(words0, words1) | |
| 34 assert ok | |
| 35 | |
| 36 | |
| 37 def test_707727(): | |
| 38 """Confirm page content cleaning does not destroy page appearance. | |
| 39 | |
| 40 MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707727 | |
| 41 """ | |
| 42 filename = os.path.join(scriptdir, "resources", "test_3362.pdf") | |
| 43 doc = pymupdf.open(filename) | |
| 44 page = doc[0] | |
| 45 pix0 = page.get_pixmap() | |
| 46 page.clean_contents(sanitize=True) | |
| 47 page = doc.reload_page(page) # required to prevent re-use | |
| 48 pix1 = page.get_pixmap() | |
| 49 rms = gentle_compare.pixmaps_rms(pix0, pix1) | |
| 50 print(f'{rms=}', flush=1) | |
| 51 pix0.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix0.png')) | |
| 52 pix1.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix1.png')) | |
| 53 if pymupdf.mupdf_version_tuple >= (1, 25, 2): | |
| 54 # New sanitising gives small fp rounding errors. | |
| 55 assert rms < 0.05 | |
| 56 else: | |
| 57 assert rms == 0 | |
| 58 | |
| 59 | |
| 60 def test_707721(): | |
| 61 """Confirm text extraction works for nested MCID with Type 3 fonts. | |
| 62 PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3357 | |
| 63 MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707721 | |
| 64 """ | |
| 65 filename = os.path.join(scriptdir, "resources", "test_3357.pdf") | |
| 66 doc = pymupdf.open(filename) | |
| 67 page = doc[0] | |
| 68 ok = page.get_text() | |
| 69 assert ok | |
| 70 | |
| 71 | |
| 72 def test_3376(): | |
| 73 """Check fix of MuPDF bug 707733. | |
| 74 | |
| 75 https://bugs.ghostscript.com/show_bug.cgi?id=707733 | |
| 76 PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3376 | |
| 77 | |
| 78 Test file contains a redaction for the first 3 words: "Table of Contents". | |
| 79 Test strategy: | |
| 80 - extract all words (sorted) | |
| 81 - apply redactions | |
| 82 - extract words again | |
| 83 - confirm: we now have 3 words less and remaining words are equal. | |
| 84 """ | |
| 85 filename = os.path.join(scriptdir, "resources", "test_3376.pdf") | |
| 86 doc = pymupdf.open(filename) | |
| 87 page = doc[0] | |
| 88 words0 = page.get_text("words", sort=True) | |
| 89 words0_s = words0[:3] # first 3 words | |
| 90 words0_e = words0[3:] # remaining words | |
| 91 assert " ".join([w[4] for w in words0_s]) == "Table of Contents" | |
| 92 | |
| 93 page.apply_redactions() | |
| 94 | |
| 95 words1 = page.get_text("words", sort=True) | |
| 96 | |
| 97 ok = gentle_compare.gentle_compare(words0_e, words1) | |
| 98 assert ok |
