Mercurial > hgrepos > Python2 > PyMuPDF
diff tests/test_textextract.py @ 39:a6bc019ac0b2 upstream
ADD: PyMuPDF v1.26.5: the original sdist.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 11:19:58 +0200 |
| parents | 1d09e1dec1d9 |
| children |
line wrap: on
line diff
--- a/tests/test_textextract.py Mon Sep 15 11:43:07 2025 +0200 +++ b/tests/test_textextract.py Sat Oct 11 11:19:58 2025 +0200 @@ -263,6 +263,10 @@ def test_document_text(): + if os.environ.get('PYODIDE_ROOT'): + print('test_document_text(): not running on Pyodide - multiprocessing not available.') + return + import platform import time @@ -310,6 +314,9 @@ def test_4524(): + if os.environ.get('PYODIDE_ROOT'): + print('test_4524(): not running on Pyodide - multiprocessing not available.') + return path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf') print('') document = pymupdf.Document(path) @@ -331,6 +338,11 @@ for line in text.split('\n'): print(f' {line!r}') print('='*40) + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple < (1, 26, 8): + assert not wt + else: + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...' def test_3687(): @@ -375,12 +387,14 @@ assert texts1 == texts0 wt = pymupdf.TOOLS.mupdf_warnings() - if pymupdf.mupdf_version_tuple < (1, 27): - assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...' - else: + if pymupdf.mupdf_version_tuple >= (1, 27): expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing' expected = '\n'.join([expected] * 56) assert wt == expected + elif pymupdf.mupdf_version_tuple >= (1, 26, 8): + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 7684 times...' + else: + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...' def test_3650(): path = os.path.normpath(f'{__file__}/../../tests/resources/test_3650.pdf') @@ -878,6 +892,9 @@ # This output is different from expected_1_23_5. expected_mupdf_1_26_1 = b'JOB No.: Shipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\xe5\x9d\x80) Tel: Fax: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88'.decode() + + # This output is different from either of the two expected strings. + expected_mupdf_1_27_0 = b'JOB No.: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81 \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88572702 \n \xe6\x93\x8d\xe4\xbd\x9c\xe9\x83\xa8: \n88570008 \n88570004 \n \xe6\x96\x87\xe4\xbb\xb6\xe9\x83\xa8: \n88570003\n \nNotify Party(complete name and address, '.decode() print(f'expected_1_23_5\n{textwrap.indent(expected_1_23_5, " ")}') print(f'expected_mupdf_1_26_1\n{textwrap.indent(expected_mupdf_1_26_1, " ")}') @@ -887,10 +904,16 @@ print(f'{text=}') print(f'{text.encode()=}') - if pymupdf.mupdf_version_tuple >= (1, 26, 1): + wt = pymupdf.TOOLS.mupdf_warnings() + if pymupdf.mupdf_version_tuple >= (1, 26, 8): + assert text == expected_mupdf_1_27_0 + assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...' + elif pymupdf.mupdf_version_tuple >= (1, 26, 1): assert text == expected_mupdf_1_26_1 + assert not wt else: print(f'No expected output for {pymupdf.mupdf_version_tuple=}') + assert not wt def test_4503():
