Mercurial > hgrepos > Python2 > PyMuPDF
view tests/test_mupdf_regressions.py @ 44:0a8b06e38e19
Need "packaging" at wheel build time too.
Parsing of version_p into a tuple now is done at build time.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 17:16:23 +0200 |
| parents | 1d09e1dec1d9 |
| children |
line wrap: on
line source
import pymupdf import os import gentle_compare scriptdir = os.path.abspath(os.path.dirname(__file__)) def test_707448(): """Confirm page content cleaning does not destroy page appearance.""" filename = os.path.join(scriptdir, "resources", "test-707448.pdf") doc = pymupdf.open(filename) page = doc[0] words0 = page.get_text("words") page.clean_contents(sanitize=True) words1 = page.get_text("words") assert gentle_compare.gentle_compare(words0, words1) def test_707673(): """Confirm page content cleaning does not destroy page appearance. Fails starting with MuPDF v1.23.9. Fixed in: commit 779b8234529cb82aa1e92826854c7bb98b19e44b (golden/master) """ filename = os.path.join(scriptdir, "resources", "test-707673.pdf") doc = pymupdf.open(filename) page = doc[0] words0 = page.get_text("words") page.clean_contents(sanitize=True) words1 = page.get_text("words") ok = gentle_compare.gentle_compare(words0, words1) assert ok def test_707727(): """Confirm page content cleaning does not destroy page appearance. MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707727 """ filename = os.path.join(scriptdir, "resources", "test_3362.pdf") doc = pymupdf.open(filename) page = doc[0] pix0 = page.get_pixmap() page.clean_contents(sanitize=True) page = doc.reload_page(page) # required to prevent re-use pix1 = page.get_pixmap() rms = gentle_compare.pixmaps_rms(pix0, pix1) print(f'{rms=}', flush=1) pix0.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix0.png')) pix1.save(os.path.normpath(f'{__file__}/../../tests/test_707727_pix1.png')) if pymupdf.mupdf_version_tuple >= (1, 25, 2): # New sanitising gives small fp rounding errors. assert rms < 0.05 else: assert rms == 0 def test_707721(): """Confirm text extraction works for nested MCID with Type 3 fonts. PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3357 MuPDF issue: https://bugs.ghostscript.com/show_bug.cgi?id=707721 """ filename = os.path.join(scriptdir, "resources", "test_3357.pdf") doc = pymupdf.open(filename) page = doc[0] ok = page.get_text() assert ok def test_3376(): """Check fix of MuPDF bug 707733. https://bugs.ghostscript.com/show_bug.cgi?id=707733 PyMuPDF issue https://github.com/pymupdf/PyMuPDF/issues/3376 Test file contains a redaction for the first 3 words: "Table of Contents". Test strategy: - extract all words (sorted) - apply redactions - extract words again - confirm: we now have 3 words less and remaining words are equal. """ filename = os.path.join(scriptdir, "resources", "test_3376.pdf") doc = pymupdf.open(filename) page = doc[0] words0 = page.get_text("words", sort=True) words0_s = words0[:3] # first 3 words words0_e = words0[3:] # remaining words assert " ".join([w[4] for w in words0_s]) == "Table of Contents" page.apply_redactions() words1 = page.get_text("words", sort=True) ok = gentle_compare.gentle_compare(words0_e, words1) assert ok
