Mercurial > hgrepos > Python2 > PyMuPDF
annotate tests/test_4503.py @ 46:7ee69f120f19 default tip
>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 17:17:30 +0200 |
| parents | 1d09e1dec1d9 |
| children |
| rev | line source |
|---|---|
|
1
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
1 """ |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
2 Test for issue #4503 in pymupdf: |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
3 Correct recognition of strikeout and underline styles in text spans. |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
4 """ |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
5 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
6 import os |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
7 import pymupdf |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
8 from pymupdf import mupdf |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
9 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
10 STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
11 UNDERLINE = mupdf.FZ_STEXT_UNDERLINE |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
12 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
13 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
14 def test_4503(): |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
15 """ |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
16 Check that the text span with the specified text has the correct styling: |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
17 strikeout, but no underline. |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
18 Previously, the text was broken in multiple spans with span breaks at |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
19 every space. and some parts were not detected as strikeout at all. |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
20 """ |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
21 scriptdir = os.path.dirname(os.path.abspath(__file__)) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
22 text = "the right to request the state to review and, if appropriate," |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
23 filename = os.path.join(scriptdir, "resources", "test-4503.pdf") |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
24 doc = pymupdf.open(filename) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
25 page = doc[0] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
26 flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
27 spans = [ |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
28 s |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
29 for b in page.get_text("dict", flags=flags)["blocks"] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
30 for l in b["lines"] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
31 for s in l["spans"] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
32 if s["text"] == text |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
33 ] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
34 assert spans, "No spans found with the specified text" |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
35 span = spans[0] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
36 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
37 assert span["char_flags"] & STRIKEOUT |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
38 assert not span["char_flags"] & UNDERLINE |
