diff tests/test_textextract.py @ 39:a6bc019ac0b2 upstream

ADD: PyMuPDF v1.26.5: the original sdist.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 11 Oct 2025 11:19:58 +0200
parents 1d09e1dec1d9
children
line wrap: on
line diff
--- a/tests/test_textextract.py	Mon Sep 15 11:43:07 2025 +0200
+++ b/tests/test_textextract.py	Sat Oct 11 11:19:58 2025 +0200
@@ -263,6 +263,10 @@
 
 
 def test_document_text():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_document_text(): not running on Pyodide - multiprocessing not available.')
+        return
+        
     import platform
     import time
     
@@ -310,6 +314,9 @@
 
 
 def test_4524():
+    if os.environ.get('PYODIDE_ROOT'):
+        print('test_4524(): not running on Pyodide - multiprocessing not available.')
+        return
     path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf')
     print('')
     document = pymupdf.Document(path)
@@ -331,6 +338,11 @@
             for line in text.split('\n'):
                 print(f'    {line!r}')
             print('='*40)
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple < (1, 26, 8):
+        assert not wt
+    else:
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 2 times...'
 
 
 def test_3687():
@@ -375,12 +387,14 @@
     assert texts1 == texts0
 
     wt = pymupdf.TOOLS.mupdf_warnings()
-    if pymupdf.mupdf_version_tuple < (1, 27):
-        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...'
-    else:
+    if pymupdf.mupdf_version_tuple >= (1, 27):
         expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing'
         expected = '\n'.join([expected] * 56)
         assert wt == expected
+    elif pymupdf.mupdf_version_tuple >= (1, 26, 8):
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 7684 times...'
+    else:
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 434 times...'
 
 def test_3650():
     path = os.path.normpath(f'{__file__}/../../tests/resources/test_3650.pdf')
@@ -878,6 +892,9 @@
     
     # This output is different from expected_1_23_5.
     expected_mupdf_1_26_1 = b'JOB No.: Shipper (complete name and address) \xe5\x8f\x91\xe8\xb4\xa7\xe4\xba\xba(\xe5\x90\x8d\xe7\xa7\xb0\xe5\x8f\x8a\xe5\x9c\xb0\xe5\x9d\x80)  Tel:                                  Fax: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81     \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88'.decode()
+
+    # This output is different from either of the two expected strings.
+    expected_mupdf_1_27_0 = b'JOB No.: \n \nS/O No. \xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95\xe5\x8f\xb7\xe7\xa0\x81   \nSINORICH TRANSPORT LIMITED \nSHIPPING ORDER \n\xe6\x89\x98\xe8\xbf\x90\xe5\x8d\x95 \n \xe5\xb8\x82\xe5\x9c\xba\xe9\x83\xa8: \n88570009 \n88577019 \n88572702 \n \xe6\x93\x8d\xe4\xbd\x9c\xe9\x83\xa8: \n88570008 \n88570004 \n \xe6\x96\x87\xe4\xbb\xb6\xe9\x83\xa8: \n88570003\n \nNotify Party(complete name and address, '.decode()
     
     print(f'expected_1_23_5\n{textwrap.indent(expected_1_23_5, "    ")}')
     print(f'expected_mupdf_1_26_1\n{textwrap.indent(expected_mupdf_1_26_1, "    ")}')
@@ -887,10 +904,16 @@
     print(f'{text=}')
     print(f'{text.encode()=}')
     
-    if pymupdf.mupdf_version_tuple >= (1, 26, 1):
+    wt = pymupdf.TOOLS.mupdf_warnings()
+    if pymupdf.mupdf_version_tuple >= (1, 26, 8):
+        assert text == expected_mupdf_1_27_0
+        assert wt == 'Actualtext with no position. Text may be lost or mispositioned.\n... repeated 120 times...'
+    elif pymupdf.mupdf_version_tuple >= (1, 26, 1):
         assert text == expected_mupdf_1_26_1
+        assert not wt
     else:
         print(f'No expected output for {pymupdf.mupdf_version_tuple=}')
+        assert not wt
 
 
 def test_4503():