view tests/test_rtl.py @ 26:a78c22e89a53

Use long Mercurial options mostly
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 19 Sep 2025 18:52:43 +0200
parents 1d09e1dec1d9
children
line wrap: on
line source

import pymupdf

import os


def test_rtl():
    path = os.path.normpath(f'{__file__}/../../tests/resources/test-E+A.pdf')
    doc = pymupdf.open(path)
    page = doc[0]
    # set of all RTL characters
    rtl_chars = set([chr(i) for i in range(0x590, 0x901)])

    for w in page.get_text("words"):
        # every word string must either ONLY contain RTL chars
        cond1 = rtl_chars.issuperset(w[4])
        # ... or NONE.
        cond2 = rtl_chars.intersection(w[4]) == set()
        assert cond1 or cond2