Mercurial > hgrepos > Python2 > PyMuPDF
view tests/test_4503.py @ 46:7ee69f120f19 default tip
>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 17:17:30 +0200 |
| parents | 1d09e1dec1d9 |
| children |
line wrap: on
line source
""" Test for issue #4503 in pymupdf: Correct recognition of strikeout and underline styles in text spans. """ import os import pymupdf from pymupdf import mupdf STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT UNDERLINE = mupdf.FZ_STEXT_UNDERLINE def test_4503(): """ Check that the text span with the specified text has the correct styling: strikeout, but no underline. Previously, the text was broken in multiple spans with span breaks at every space. and some parts were not detected as strikeout at all. """ scriptdir = os.path.dirname(os.path.abspath(__file__)) text = "the right to request the state to review and, if appropriate," filename = os.path.join(scriptdir, "resources", "test-4503.pdf") doc = pymupdf.open(filename) page = doc[0] flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES spans = [ s for b in page.get_text("dict", flags=flags)["blocks"] for l in b["lines"] for s in l["spans"] if s["text"] == text ] assert spans, "No spans found with the specified text" span = spans[0] assert span["char_flags"] & STRIKEOUT assert not span["char_flags"] & UNDERLINE
