comparison tests/test_tesseract.py @ 39:a6bc019ac0b2 upstream

ADD: PyMuPDF v1.26.5: the original sdist.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:19:58 +0200
parents 1d09e1dec1d9
children
comparison
equal deleted inserted replaced
2:b50eed0cc0ef 39:a6bc019ac0b2
22 # rebased. 22 # rebased.
23 if pymupdf.mupdf_version_tuple < (1, 25, 4): 23 if pymupdf.mupdf_version_tuple < (1, 25, 4):
24 tail = 'OCR initialisation failed' 24 tail = 'OCR initialisation failed'
25 else: 25 else:
26 tail = 'Tesseract language initialisation failed' 26 tail = 'Tesseract language initialisation failed'
27 e_expected = f'code=3: {tail}' 27 if os.environ.get('PYODIDE_ROOT'):
28 if platform.system() == 'OpenBSD': 28 e_expected = 'code=6: No OCR support in this build'
29 # 2023-12-12: For some reason the SWIG catch code only catches 29 e_expected_type = pymupdf.mupdf.FzErrorUnsupported
30 # the exception as FzErrorBase.
31 e_expected_type = pymupdf.mupdf.FzErrorBase
32 print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.')
33 else: 30 else:
34 e_expected_type = pymupdf.mupdf.FzErrorLibrary 31 e_expected = f'code=3: {tail}'
32 if platform.system() == 'OpenBSD':
33 # 2023-12-12: For some reason the SWIG catch code only catches
34 # the exception as FzErrorBase.
35 e_expected_type = pymupdf.mupdf.FzErrorBase
36 print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.')
37 else:
38 e_expected_type = pymupdf.mupdf.FzErrorLibrary
35 else: 39 else:
36 # classic. 40 # classic.
37 e_expected = 'OCR initialisation failed' 41 e_expected = 'OCR initialisation failed'
38 e_expected_type = None 42 e_expected_type = None
39 tessdata_prefix = os.environ.get('TESSDATA_PREFIX') 43 tessdata_prefix = os.environ.get('TESSDATA_PREFIX')
69 def test_3842b(): 73 def test_3842b():
70 # Check Tesseract failure when given a bogus languages. 74 # Check Tesseract failure when given a bogus languages.
71 # 75 #
72 # Note that Tesseract seems to output its own diagnostics. 76 # Note that Tesseract seems to output its own diagnostics.
73 # 77 #
78 if os.environ.get('PYODIDE_ROOT'):
79 print('test_3842b(): not running on Pyodide - cannot run child processes.')
80 return
81
74 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') 82 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf')
75 with pymupdf.open(path) as document: 83 with pymupdf.open(path) as document:
76 page = document[6] 84 page = document[6]
77 try: 85 try:
78 partial_tp = page.get_textpage_ocr(flags=0, full=False, language='qwerty') 86 partial_tp = page.get_textpage_ocr(flags=0, full=False, language='qwerty')
89 else: 97 else:
90 assert 'Tesseract language initialisation failed' in str(e) 98 assert 'Tesseract language initialisation failed' in str(e)
91 99
92 100
93 def test_3842(): 101 def test_3842():
102 if os.environ.get('PYODIDE_ROOT'):
103 print('test_3842(): not running on Pyodide - cannot run child processes.')
104 return
105
94 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') 106 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf')
95 with pymupdf.open(path) as document: 107 with pymupdf.open(path) as document:
96 page = document[6] 108 page = document[6]
97 try: 109 try:
98 partial_tp = page.get_textpage_ocr(flags=0, full=False) 110 partial_tp = page.get_textpage_ocr(flags=0, full=False)