Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_textsearch.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 """ | |
| 2 "test_search1": | |
| 3 Search for some text on a PDF page, and compare content of returned hit | |
| 4 rectangle with the searched text. | |
| 5 | |
| 6 "test_search2": | |
| 7 Text search with 'clip' parameter - clip rectangle contains two occurrences | |
| 8 of searched text. Confirm search locations are inside clip. | |
| 9 """ | |
| 10 | |
| 11 import os | |
| 12 | |
| 13 import pymupdf | |
| 14 | |
| 15 scriptdir = os.path.abspath(os.path.dirname(__file__)) | |
| 16 filename1 = os.path.join(scriptdir, "resources", "2.pdf") | |
| 17 filename2 = os.path.join(scriptdir, "resources", "github_sample.pdf") | |
| 18 filename3 = os.path.join(scriptdir, "resources", "text-find-ligatures.pdf") | |
| 19 | |
| 20 | |
| 21 def test_search1(): | |
| 22 doc = pymupdf.open(filename1) | |
| 23 page = doc[0] | |
| 24 needle = "mupdf" | |
| 25 rlist = page.search_for(needle) | |
| 26 assert rlist != [] | |
| 27 for rect in rlist: | |
| 28 assert needle in page.get_textbox(rect).lower() | |
| 29 | |
| 30 | |
| 31 def test_search2(): | |
| 32 doc = pymupdf.open(filename2) | |
| 33 page = doc[0] | |
| 34 needle = "the" | |
| 35 clip = pymupdf.Rect(40.5, 228.31436157226562, 346.5226135253906, 239.5338592529297) | |
| 36 rl = page.search_for(needle, clip=clip) | |
| 37 assert len(rl) == 2 | |
| 38 for r in rl: | |
| 39 assert r in clip | |
| 40 | |
| 41 | |
| 42 def test_search3(): | |
| 43 """Ensure we find text whether or not it contains ligatures.""" | |
| 44 doc = pymupdf.open(filename3) | |
| 45 page = doc[0] | |
| 46 needle = "flag" | |
| 47 hits = page.search_for(needle, flags=pymupdf.TEXTFLAGS_SEARCH) | |
| 48 assert len(hits) == 2 # all occurrences found | |
| 49 hits = page.search_for( | |
| 50 needle, flags=pymupdf.TEXTFLAGS_SEARCH | pymupdf.TEXT_PRESERVE_LIGATURES | |
| 51 ) | |
| 52 assert len(hits) == 1 # only found text without ligatures |
