diff tests/test_textsearch.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_textsearch.py	Mon Sep 15 11:37:51 2025 +0200
@@ -0,0 +1,52 @@
+"""
+"test_search1":
+Search for some text on a PDF page, and compare content of returned hit
+rectangle with the searched text.
+
+"test_search2":
+Text search with 'clip' parameter - clip rectangle contains two occurrences
+of searched text. Confirm search locations are inside clip.
+"""
+
+import os
+
+import pymupdf
+
+scriptdir = os.path.abspath(os.path.dirname(__file__))
+filename1 = os.path.join(scriptdir, "resources", "2.pdf")
+filename2 = os.path.join(scriptdir, "resources", "github_sample.pdf")
+filename3 = os.path.join(scriptdir, "resources", "text-find-ligatures.pdf")
+
+
+def test_search1():
+    doc = pymupdf.open(filename1)
+    page = doc[0]
+    needle = "mupdf"
+    rlist = page.search_for(needle)
+    assert rlist != []
+    for rect in rlist:
+        assert needle in page.get_textbox(rect).lower()
+
+
+def test_search2():
+    doc = pymupdf.open(filename2)
+    page = doc[0]
+    needle = "the"
+    clip = pymupdf.Rect(40.5, 228.31436157226562, 346.5226135253906, 239.5338592529297)
+    rl = page.search_for(needle, clip=clip)
+    assert len(rl) == 2
+    for r in rl:
+        assert r in clip
+
+
+def test_search3():
+    """Ensure we find text whether or not it contains ligatures."""
+    doc = pymupdf.open(filename3)
+    page = doc[0]
+    needle = "flag"
+    hits = page.search_for(needle, flags=pymupdf.TEXTFLAGS_SEARCH)
+    assert len(hits) == 2  # all occurrences found
+    hits = page.search_for(
+        needle, flags=pymupdf.TEXTFLAGS_SEARCH | pymupdf.TEXT_PRESERVE_LIGATURES
+    )
+    assert len(hits) == 1  # only found text without ligatures