diff tests/test_4503.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_4503.py	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,38 @@
+"""
+Test for issue #4503 in pymupdf:
+Correct recognition of strikeout and underline styles in text spans.
+"""
+
+import os
+import pymupdf
+from pymupdf import mupdf
+
+STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT
+UNDERLINE = mupdf.FZ_STEXT_UNDERLINE
+
+
+def test_4503():
+    """
+    Check that the text span with the specified text has the correct styling:
+    strikeout, but no underline.
+    Previously, the text was broken in multiple spans with span breaks at
+    every space. and some parts were not detected as strikeout at all.
+    """
+    scriptdir = os.path.dirname(os.path.abspath(__file__))
+    text = "the right to request the state to review and, if appropriate,"
+    filename = os.path.join(scriptdir, "resources", "test-4503.pdf")
+    doc = pymupdf.open(filename)
+    page = doc[0]
+    flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES
+    spans = [
+        s
+        for b in page.get_text("dict", flags=flags)["blocks"]
+        for l in b["lines"]
+        for s in l["spans"]
+        if s["text"] == text
+    ]
+    assert spans, "No spans found with the specified text"
+    span = spans[0]
+
+    assert span["char_flags"] & STRIKEOUT
+    assert not span["char_flags"] & UNDERLINE