Mercurial > hgrepos > Python2 > PyMuPDF
diff tests/test_word_delimiters.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_word_delimiters.py Mon Sep 15 11:37:51 2025 +0200 @@ -0,0 +1,23 @@ +import pymupdf +import string + + +def test_delimiters(): + """Test changing word delimiting characters.""" + doc = pymupdf.open() + page = doc.new_page() + text = "word1,word2 - word3. word4?word5." + page.insert_text((50, 50), text) + + # Standard words extraction: + # only spaces and line breaks start a new word + words0 = [w[4] for w in page.get_text("words")] + assert words0 == ["word1,word2", "-", "word3.", "word4?word5."] + + # extract words again + words1 = [w[4] for w in page.get_text("words", delimiters=string.punctuation)] + assert words0 != words1 + assert " ".join(words1) == "word1 word2 word3 word4 word5" + + # confirm we will be getting old extraction + assert [w[4] for w in page.get_text("words")] == words0
