Mercurial > hgrepos > Python2 > PyMuPDF
annotate tests/test_linebreaks.py @ 34:da085c7f52c6
FIX: In Makerules: check for empty EXTRA_CHECKS also before applying them
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 21 Sep 2025 18:09:12 +0200 |
| parents | 1d09e1dec1d9 |
| children |
| rev | line source |
|---|---|
|
1
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
1 import pymupdf |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
2 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
3 import os.path |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
4 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
5 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
6 def test_linebreaks(): |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
7 """Test avoidance of linebreaks.""" |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
8 path = os.path.abspath(f"{__file__}/../../tests/resources/test-linebreaks.pdf") |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
9 doc = pymupdf.open(path) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
10 page = doc[0] |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
11 tp = page.get_textpage(flags=pymupdf.TEXTFLAGS_WORDS) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
12 word_count = len(page.get_text("words", textpage=tp)) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
13 line_count1 = len(page.get_text(textpage=tp).splitlines()) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
14 line_count2 = len(page.get_text(sort=True, textpage=tp).splitlines()) |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
15 assert word_count == line_count1 |
|
1d09e1dec1d9
ADD: PyMuPDF v1.26.4: the original sdist.
Franz Glasner <fzglas.hg@dom66.de>
parents:
diff
changeset
|
16 assert line_count2 < line_count1 / 2 |
