Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_tesseract.py @ 39:a6bc019ac0b2 upstream
ADD: PyMuPDF v1.26.5: the original sdist.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 11:19:58 +0200 |
| parents | 1d09e1dec1d9 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:b50eed0cc0ef | 39:a6bc019ac0b2 |
|---|---|
| 22 # rebased. | 22 # rebased. |
| 23 if pymupdf.mupdf_version_tuple < (1, 25, 4): | 23 if pymupdf.mupdf_version_tuple < (1, 25, 4): |
| 24 tail = 'OCR initialisation failed' | 24 tail = 'OCR initialisation failed' |
| 25 else: | 25 else: |
| 26 tail = 'Tesseract language initialisation failed' | 26 tail = 'Tesseract language initialisation failed' |
| 27 e_expected = f'code=3: {tail}' | 27 if os.environ.get('PYODIDE_ROOT'): |
| 28 if platform.system() == 'OpenBSD': | 28 e_expected = 'code=6: No OCR support in this build' |
| 29 # 2023-12-12: For some reason the SWIG catch code only catches | 29 e_expected_type = pymupdf.mupdf.FzErrorUnsupported |
| 30 # the exception as FzErrorBase. | |
| 31 e_expected_type = pymupdf.mupdf.FzErrorBase | |
| 32 print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.') | |
| 33 else: | 30 else: |
| 34 e_expected_type = pymupdf.mupdf.FzErrorLibrary | 31 e_expected = f'code=3: {tail}' |
| 32 if platform.system() == 'OpenBSD': | |
| 33 # 2023-12-12: For some reason the SWIG catch code only catches | |
| 34 # the exception as FzErrorBase. | |
| 35 e_expected_type = pymupdf.mupdf.FzErrorBase | |
| 36 print(f'OpenBSD workaround - expecting FzErrorBase, not FzErrorLibrary.') | |
| 37 else: | |
| 38 e_expected_type = pymupdf.mupdf.FzErrorLibrary | |
| 35 else: | 39 else: |
| 36 # classic. | 40 # classic. |
| 37 e_expected = 'OCR initialisation failed' | 41 e_expected = 'OCR initialisation failed' |
| 38 e_expected_type = None | 42 e_expected_type = None |
| 39 tessdata_prefix = os.environ.get('TESSDATA_PREFIX') | 43 tessdata_prefix = os.environ.get('TESSDATA_PREFIX') |
| 69 def test_3842b(): | 73 def test_3842b(): |
| 70 # Check Tesseract failure when given a bogus languages. | 74 # Check Tesseract failure when given a bogus languages. |
| 71 # | 75 # |
| 72 # Note that Tesseract seems to output its own diagnostics. | 76 # Note that Tesseract seems to output its own diagnostics. |
| 73 # | 77 # |
| 78 if os.environ.get('PYODIDE_ROOT'): | |
| 79 print('test_3842b(): not running on Pyodide - cannot run child processes.') | |
| 80 return | |
| 81 | |
| 74 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') | 82 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') |
| 75 with pymupdf.open(path) as document: | 83 with pymupdf.open(path) as document: |
| 76 page = document[6] | 84 page = document[6] |
| 77 try: | 85 try: |
| 78 partial_tp = page.get_textpage_ocr(flags=0, full=False, language='qwerty') | 86 partial_tp = page.get_textpage_ocr(flags=0, full=False, language='qwerty') |
| 89 else: | 97 else: |
| 90 assert 'Tesseract language initialisation failed' in str(e) | 98 assert 'Tesseract language initialisation failed' in str(e) |
| 91 | 99 |
| 92 | 100 |
| 93 def test_3842(): | 101 def test_3842(): |
| 102 if os.environ.get('PYODIDE_ROOT'): | |
| 103 print('test_3842(): not running on Pyodide - cannot run child processes.') | |
| 104 return | |
| 105 | |
| 94 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') | 106 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3842.pdf') |
| 95 with pymupdf.open(path) as document: | 107 with pymupdf.open(path) as document: |
| 96 page = document[6] | 108 page = document[6] |
| 97 try: | 109 try: |
| 98 partial_tp = page.get_textpage_ocr(flags=0, full=False) | 110 partial_tp = page.get_textpage_ocr(flags=0, full=False) |
