Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_word_delimiters.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 import pymupdf | |
| 2 import string | |
| 3 | |
| 4 | |
| 5 def test_delimiters(): | |
| 6 """Test changing word delimiting characters.""" | |
| 7 doc = pymupdf.open() | |
| 8 page = doc.new_page() | |
| 9 text = "word1,word2 - word3. word4?word5." | |
| 10 page.insert_text((50, 50), text) | |
| 11 | |
| 12 # Standard words extraction: | |
| 13 # only spaces and line breaks start a new word | |
| 14 words0 = [w[4] for w in page.get_text("words")] | |
| 15 assert words0 == ["word1,word2", "-", "word3.", "word4?word5."] | |
| 16 | |
| 17 # extract words again | |
| 18 words1 = [w[4] for w in page.get_text("words", delimiters=string.punctuation)] | |
| 19 assert words0 != words1 | |
| 20 assert " ".join(words1) == "word1 word2 word3 word4 word5" | |
| 21 | |
| 22 # confirm we will be getting old extraction | |
| 23 assert [w[4] for w in page.get_text("words")] == words0 |
