view tests/test_4503.py @ 46:7ee69f120f19 default tip

>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 17:17:30 +0200
parents 1d09e1dec1d9
children
line wrap: on
line source

"""
Test for issue #4503 in pymupdf:
Correct recognition of strikeout and underline styles in text spans.
"""

import os
import pymupdf
from pymupdf import mupdf

STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT
UNDERLINE = mupdf.FZ_STEXT_UNDERLINE


def test_4503():
    """
    Check that the text span with the specified text has the correct styling:
    strikeout, but no underline.
    Previously, the text was broken in multiple spans with span breaks at
    every space. and some parts were not detected as strikeout at all.
    """
    scriptdir = os.path.dirname(os.path.abspath(__file__))
    text = "the right to request the state to review and, if appropriate,"
    filename = os.path.join(scriptdir, "resources", "test-4503.pdf")
    doc = pymupdf.open(filename)
    page = doc[0]
    flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES
    spans = [
        s
        for b in page.get_text("dict", flags=flags)["blocks"]
        for l in b["lines"]
        for s in l["spans"]
        if s["text"] == text
    ]
    assert spans, "No spans found with the specified text"
    span = spans[0]

    assert span["char_flags"] & STRIKEOUT
    assert not span["char_flags"] & UNDERLINE