Mercurial > hgrepos > Python2 > PyMuPDF
diff tests/test_mupdf_regressions.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_mupdf_regressions.py Mon Sep 15 11:37:51 2025 +0200 @@ -0,0 +1,98 @@ +import pymupdf +import os +import gentle_compare + +scriptdir = os.path.abspath(os.path.dirname(__file__)) + + +def test_707448(): + """Confirm page content cleaning does not destroy page appearance.""" + filename = os.path.join(scriptdir, "resources", "test-707448.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words") + page.clean_contents(sanitize=True) + words1 = page.get_text("words") + assert gentle_compare.gentle_compare(words0, words1) + + +def test_707673(): + """Confirm page content cleaning does not destroy page appearance. + + Fails starting with MuPDF v1.23.9. + + Fixed in: + commit 779b8234529cb82aa1e92826854c7bb98b19e44b (golden/master) + """ + filename = os.path.join(scriptdir, "resources", "test-707673.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words") + page.clean_contents(sanitize=True) + words1 = page.get_text("words") + ok = gentle_compare.gentle_compare(words0, words1) + assert ok + + +def test_707727(): + """Confirm page content cleaning does not destroy page appearance. + + MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707727 + """ + filename = os.path.join(scriptdir, "resources", "test_3362.pdf") + doc = pymupdf.open(filename) + page = doc[0] + pix0 = page.get_pixmap() + page.clean_contents(sanitize=True) + page = doc.reload_page(page) # required to prevent re-use + pix1 = page.get_pixmap() + rms = gentle_compare.pixmaps_rms(pix0, pix1) + print(f'{rms=}', flush=1) + pix0.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix0.png')) + pix1.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix1.png')) + if pymupdf.mupdf_version_tuple >= (1, 25, 2): + # New sanitising gives small fp rounding errors. + assert rms < 0.05 + else: + assert rms == 0 + + +def test_707721(): + """Confirm text extraction works for nested MCID with Type 3 fonts. + PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3357 + MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707721 + """ + filename = os.path.join(scriptdir, "resources", "test_3357.pdf") + doc = pymupdf.open(filename) + page = doc[0] + ok = page.get_text() + assert ok + + +def test_3376(): + """Check fix of MuPDF bug 707733. + + https://bugs.ghostscript.com/show_bug.cgi?id=707733 + PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3376 + + Test file contains a redaction for the first 3 words: "Table of Contents". + Test strategy: + - extract all words (sorted) + - apply redactions + - extract words again + - confirm: we now have 3 words less and remaining words are equal. + """ + filename = os.path.join(scriptdir, "resources", "test_3376.pdf") + doc = pymupdf.open(filename) + page = doc[0] + words0 = page.get_text("words", sort=True) + words0_s = words0[:3] # first 3 words + words0_e = words0[3:] # remaining words + assert " ".join([w[4] for w in words0_s]) == "Table of Contents" + + page.apply_redactions() + + words1 = page.get_text("words", sort=True) + + ok = gentle_compare.gentle_compare(words0_e, words1) + assert ok
