view tests/test_rtl.py @ 46:7ee69f120f19 default tip

>>>>> tag v1.26.5+1 for changeset b74429b0f5c4
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 17:17:30 +0200
parents 1d09e1dec1d9
children
line wrap: on
line source

import pymupdf

import os


def test_rtl():
    path = os.path.normpath(f'{__file__}/../../tests/resources/test-E+A.pdf')
    doc = pymupdf.open(path)
    page = doc[0]
    # set of all RTL characters
    rtl_chars = set([chr(i) for i in range(0x590, 0x901)])

    for w in page.get_text("words"):
        # every word string must either ONLY contain RTL chars
        cond1 = rtl_chars.issuperset(w[4])
        # ... or NONE.
        cond2 = rtl_chars.intersection(w[4]) == set()
        assert cond1 or cond2