comparison tests/test_4503.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 """
2 Test for issue #4503 in pymupdf:
3 Correct recognition of strikeout and underline styles in text spans.
4 """
5
6 import os
7 import pymupdf
8 from pymupdf import mupdf
9
10 STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT
11 UNDERLINE = mupdf.FZ_STEXT_UNDERLINE
12
13
14 def test_4503():
15 """
16 Check that the text span with the specified text has the correct styling:
17 strikeout, but no underline.
18 Previously, the text was broken in multiple spans with span breaks at
19 every space. and some parts were not detected as strikeout at all.
20 """
21 scriptdir = os.path.dirname(os.path.abspath(__file__))
22 text = "the right to request the state to review and, if appropriate,"
23 filename = os.path.join(scriptdir, "resources", "test-4503.pdf")
24 doc = pymupdf.open(filename)
25 page = doc[0]
26 flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES
27 spans = [
28 s
29 for b in page.get_text("dict", flags=flags)["blocks"]
30 for l in b["lines"]
31 for s in l["spans"]
32 if s["text"] == text
33 ]
34 assert spans, "No spans found with the specified text"
35 span = spans[0]
36
37 assert span["char_flags"] & STRIKEOUT
38 assert not span["char_flags"] & UNDERLINE