Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_font.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 """ | |
| 2 Tests for the Font class. | |
| 3 """ | |
| 4 import os | |
| 5 import platform | |
| 6 import pymupdf | |
| 7 import subprocess | |
| 8 import textwrap | |
| 9 | |
| 10 import util | |
| 11 | |
| 12 | |
| 13 def test_font1(): | |
| 14 text = "PyMuPDF" | |
| 15 font = pymupdf.Font("helv") | |
| 16 assert font.name == "Helvetica" | |
| 17 tl = font.text_length(text, fontsize=20) | |
| 18 cl = font.char_lengths(text, fontsize=20) | |
| 19 assert len(text) == len(cl) | |
| 20 assert abs(sum(cl) - tl) < pymupdf.EPSILON | |
| 21 for i in range(len(cl)): | |
| 22 assert cl[i] == font.glyph_advance(ord(text[i])) * 20 | |
| 23 font2 = pymupdf.Font(fontbuffer=font.buffer) | |
| 24 codepoints1 = font.valid_codepoints() | |
| 25 codepoints2 = font2.valid_codepoints() | |
| 26 print('') | |
| 27 print(f'{len(codepoints1)=}') | |
| 28 print(f'{len(codepoints2)=}') | |
| 29 if 0: | |
| 30 for i, (ucs1, ucs2) in enumerate(zip(codepoints1, codepoints2)): | |
| 31 print(f' {i}: {ucs1=} {ucs2=} {"" if ucs2==ucs2 else "*"}') | |
| 32 assert font2.valid_codepoints() == font.valid_codepoints() | |
| 33 | |
| 34 # Also check we can get font's bbox. | |
| 35 bbox1 = font.bbox | |
| 36 print(f'{bbox1=}') | |
| 37 if hasattr(pymupdf, 'mupdf'): | |
| 38 bbox2 = font.this.fz_font_bbox() | |
| 39 assert bbox2 == bbox1 | |
| 40 | |
| 41 | |
| 42 def test_font2(): | |
| 43 """Old and new length computation must be the same.""" | |
| 44 font = pymupdf.Font("helv") | |
| 45 text = "PyMuPDF" | |
| 46 assert font.text_length(text) == pymupdf.get_text_length(text) | |
| 47 | |
| 48 | |
| 49 def test_fontname(): | |
| 50 """Assert a valid PDF fontname.""" | |
| 51 doc = pymupdf.open() | |
| 52 page = doc.new_page() | |
| 53 assert page.insert_font() # assert: a valid fontname works! | |
| 54 detected = False # preset indicator | |
| 55 try: # fontname check will fail first - don't need a font at all here | |
| 56 page.insert_font(fontname="illegal/char", fontfile="unimportant") | |
| 57 except ValueError as e: | |
| 58 if str(e).startswith("bad fontname chars"): | |
| 59 detected = True # illegal fontname detected | |
| 60 assert detected | |
| 61 | |
| 62 def test_2608(): | |
| 63 flags = (pymupdf.TEXT_DEHYPHENATE | pymupdf.TEXT_MEDIABOX_CLIP) | |
| 64 with pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/2201.00069.pdf')) as doc: | |
| 65 page = doc[0] | |
| 66 blocks = page.get_text_blocks(flags=flags) | |
| 67 text = blocks[10][4] | |
| 68 with open(os.path.abspath(f'{__file__}/../../tests/test_2608_out'), 'wb') as f: | |
| 69 f.write(text.encode('utf8')) | |
| 70 path_expected = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected') | |
| 71 path_expected_1_26 = os.path.normpath(f'{__file__}/../../tests/resources/test_2608_expected_1.26') | |
| 72 if pymupdf.mupdf_version_tuple >= (1, 27): | |
| 73 path_expected2 = path_expected | |
| 74 else: | |
| 75 path_expected2 = path_expected_1_26 | |
| 76 with open(path_expected2, 'rb') as f: | |
| 77 expected = f.read().decode('utf8') | |
| 78 # Github windows x32 seems to insert \r characters; maybe something to | |
| 79 # do with the Python installation's line endings settings. | |
| 80 expected = expected.replace('\r', '') | |
| 81 print(f'test_2608(): {text.encode("utf8")=}') | |
| 82 print(f'test_2608(): {expected.encode("utf8")=}') | |
| 83 assert text == expected | |
| 84 | |
| 85 def test_fontarchive(): | |
| 86 import subprocess | |
| 87 arch = pymupdf.Archive() | |
| 88 css = pymupdf.css_for_pymupdf_font("notos", archive=arch, name="sans-serif") | |
| 89 print(css) | |
| 90 print(arch.entry_list) | |
| 91 assert arch.entry_list == \ | |
| 92 [ | |
| 93 { | |
| 94 'fmt': 'tree', | |
| 95 'entries': | |
| 96 [ | |
| 97 'notosbo', 'notosbi', 'notosit', 'notos' | |
| 98 ], | |
| 99 'path': None | |
| 100 } | |
| 101 ] | |
| 102 | |
| 103 def test_load_system_font(): | |
| 104 if not hasattr(pymupdf, 'mupdf'): | |
| 105 print(f'test_load_system_font(): Not running on classic.') | |
| 106 return | |
| 107 trace = list() | |
| 108 def font_f(name, bold, italic, needs_exact_metrics): | |
| 109 trace.append((name, bold, italic, needs_exact_metrics)) | |
| 110 #print(f'test_load_system_font():font_f(): Looking for font: {name=} {bold=} {italic=} {needs_exact_metrics=}.') | |
| 111 return None | |
| 112 def f_cjk(name, ordering, serif): | |
| 113 trace.append((name, ordering, serif)) | |
| 114 #print(f'test_load_system_font():f_cjk(): Looking for font: {name=} {ordering=} {serif=}.') | |
| 115 return None | |
| 116 def f_fallback(script, language, serif, bold, italic): | |
| 117 trace.append((script, language, serif, bold, italic)) | |
| 118 #print(f'test_load_system_font():f_fallback(): looking for font: {script=} {language=} {serif=} {bold=} {italic=}.') | |
| 119 return None | |
| 120 pymupdf.mupdf.fz_install_load_system_font_funcs(font_f, f_cjk, f_fallback) | |
| 121 f = pymupdf.mupdf.fz_load_system_font("some-font-name", 0, 0, 0) | |
| 122 assert trace == [ | |
| 123 ('some-font-name', 0, 0, 0), | |
| 124 ], f'Incorrect {trace=}.' | |
| 125 print(f'test_load_system_font(): {f.m_internal=}') | |
| 126 | |
| 127 | |
| 128 def test_mupdf_subset_fonts2(): | |
| 129 if not hasattr(pymupdf, 'mupdf'): | |
| 130 print('Not running on rebased.') | |
| 131 return | |
| 132 path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf') | |
| 133 with pymupdf.open(path) as doc: | |
| 134 n = len(doc) | |
| 135 pages = [i*2 for i in range(n//2)] | |
| 136 print(f'{pages=}.') | |
| 137 pymupdf.mupdf.pdf_subset_fonts2(pymupdf._as_pdf_document(doc), pages) | |
| 138 | |
| 139 | |
| 140 def test_3677(): | |
| 141 pymupdf.TOOLS.set_subset_fontnames(True) | |
| 142 try: | |
| 143 path = os.path.abspath(f'{__file__}/../../tests/resources/test_3677.pdf') | |
| 144 font_names_expected = [ | |
| 145 'BCDEEE+Aptos', | |
| 146 'BCDFEE+Aptos', | |
| 147 'BCDGEE+Calibri-Light', | |
| 148 'BCDHEE+Calibri-Light', | |
| 149 ] | |
| 150 font_names = list() | |
| 151 with pymupdf.open(path) as document: | |
| 152 for page in document: | |
| 153 for block in page.get_text('dict')['blocks']: | |
| 154 if block['type'] == 0: | |
| 155 if 'lines' in block.keys(): | |
| 156 for line in block['lines']: | |
| 157 for span in line['spans']: | |
| 158 font_name=span['font'] | |
| 159 print(font_name) | |
| 160 font_names.append(font_name) | |
| 161 assert font_names == font_names_expected, f'{font_names=}' | |
| 162 finally: | |
| 163 pymupdf.TOOLS.set_subset_fontnames(False) | |
| 164 | |
| 165 | |
| 166 def test_3933(): | |
| 167 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3933.pdf') | |
| 168 with pymupdf.open(path) as document: | |
| 169 page = document[0] | |
| 170 print(f'{len(page.get_fonts())=}') | |
| 171 | |
| 172 expected = { | |
| 173 'BCDEEE+Calibri': 39, | |
| 174 'BCDFEE+SwissReSan-Regu': 53, | |
| 175 'BCDGEE+SwissReSan-Ital': 20, | |
| 176 'BCDHEE+SwissReSan-Bold': 20, | |
| 177 'BCDIEE+SwissReSan-Regu': 53, | |
| 178 'BCDJEE+Calibri': 39, | |
| 179 } | |
| 180 | |
| 181 for xref, _, _, name, _, _ in page.get_fonts(): | |
| 182 _, _, _, content = document.extract_font(xref) | |
| 183 | |
| 184 if content: | |
| 185 font = pymupdf.Font(fontname=name, fontbuffer=content) | |
| 186 supported_symbols = font.valid_codepoints() | |
| 187 print(f'Font {name}: {len(supported_symbols)=}.', flush=1) | |
| 188 assert len(supported_symbols) == expected.get(name) | |
| 189 | |
| 190 | |
| 191 def test_3780(): | |
| 192 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3780.pdf') | |
| 193 with pymupdf.open(path) as document: | |
| 194 for page_i, page in enumerate(document): | |
| 195 for itm in page.get_fonts(): | |
| 196 buff=document.extract_font(itm[0])[-1] | |
| 197 font=pymupdf.Font(fontbuffer=buff) | |
| 198 print(f'{page_i=}: xref {itm[0]} {font.name=} {font.ascender=} {font.descender=}.') | |
| 199 if page_i == 0: | |
| 200 d = page.get_text('dict') | |
| 201 #for n, v in d.items(): | |
| 202 # print(f' {n}: {v!r}') | |
| 203 for i, block in enumerate(d['blocks']): | |
| 204 print(f'block {i}:') | |
| 205 for j, line in enumerate(block['lines']): | |
| 206 print(f' line {j}:') | |
| 207 for k, span in enumerate(line['spans']): | |
| 208 print(f' span {k}:') | |
| 209 for n, v in span.items(): | |
| 210 print(f' {n}: {v!r}') | |
| 211 | |
| 212 | |
| 213 def test_3887(): | |
| 214 print(f'{pymupdf.version=}') | |
| 215 path = os.path.normpath(f'{__file__}/../../tests/resources/test_3887.pdf') | |
| 216 | |
| 217 path2 = os.path.normpath(f'{__file__}/../../tests/resources/test_3887.pdf.ez.pdf') | |
| 218 with pymupdf.open(path) as document: | |
| 219 document.subset_fonts(fallback=False) | |
| 220 document.ez_save(path2) | |
| 221 | |
| 222 with pymupdf.open(path2) as document: | |
| 223 text = f"\u0391\u3001\u0392\u3001\u0393\u3001\u0394\u3001\u0395\u3001\u0396\u3001\u0397\u3001\u0398\u3001\u0399\u3001\u039a\u3001\u039b\u3001\u039c\u3001\u039d\u3001\u039e\u3001\u039f\u3001\u03a0\u3001\u03a1\u3001\u03a3\u3001\u03a4\u3001\u03a5\u3001\u03a6\u3001\u03a7\u3001\u03a8\u3001\u03a9\u3002\u03b1\u3001\u03b2\u3001\u03b3\u3001\u03b4\u3001\u03b5\u3001\u03b6\u3001\u03b7\u3001\u03b8\u3001\u03b9\u3001\u03ba\u3001\u03bb\u3001\u03bc\u3001\u03bd\u3001\u03be\u3001\u03bf\u3001\u03c0\u3001\u03c1\u3001\u03c2\u3001\u03c4\u3001\u03c5\u3001\u03c6\u3001\u03c7\u3001\u03c8\u3001\u03c9\u3002" | |
| 224 page = document[0] | |
| 225 chars = [c for b in page.get_text("rawdict",flags=0)["blocks"] for l in b["lines"] for s in l["spans"] for c in s["chars"]] | |
| 226 output = [c["c"] for c in chars] | |
| 227 print(f'text:\n {text}') | |
| 228 print(f'output:\n {output}') | |
| 229 pixmap = page.get_pixmap() | |
| 230 path_pixmap = f'{path}.0.png' | |
| 231 pixmap.save(path_pixmap) | |
| 232 print(f'Have saved to: {path_pixmap=}') | |
| 233 assert set(output)==set(text) | |
| 234 | |
| 235 | |
| 236 def test_4457(): | |
| 237 print() | |
| 238 files = ( | |
| 239 ('https://github.com/user-attachments/files/20862923/test_4457_a.pdf', 'test_4457_a.pdf', None, 4), | |
| 240 ('https://github.com/user-attachments/files/20862922/test_4457_b.pdf', 'test_4457_b.pdf', None, 9), | |
| 241 ) | |
| 242 for url, name, size, rms_old_after_max in files: | |
| 243 path = util.download(url, name, size) | |
| 244 | |
| 245 with pymupdf.open(path) as document: | |
| 246 page = document[0] | |
| 247 | |
| 248 pixmap = document[0].get_pixmap() | |
| 249 path_pixmap = f'{path}.png' | |
| 250 pixmap.save(path_pixmap) | |
| 251 print(f'Have created: {path_pixmap=}') | |
| 252 | |
| 253 text = page.get_text() | |
| 254 path_before = f'{path}.before.pdf' | |
| 255 path_after = f'{path}.after.pdf' | |
| 256 document.ez_save(path_before, garbage=4) | |
| 257 print(f'Have created {path_before=}') | |
| 258 | |
| 259 document.subset_fonts() | |
| 260 document.ez_save(path_after, garbage=4) | |
| 261 print(f'Have created {path_after=}') | |
| 262 | |
| 263 with pymupdf.open(path_before) as document: | |
| 264 text_before = document[0].get_text() | |
| 265 pixmap_before = document[0].get_pixmap() | |
| 266 path_pixmap_before = f'{path_before}.png' | |
| 267 pixmap_before.save(path_pixmap_before) | |
| 268 print(f'Have created: {path_pixmap_before=}') | |
| 269 | |
| 270 with pymupdf.open(path_after) as document: | |
| 271 text_after = document[0].get_text() | |
| 272 pixmap_after = document[0].get_pixmap() | |
| 273 path_pixmap_after = f'{path_after}.png' | |
| 274 pixmap_after.save(path_pixmap_after) | |
| 275 print(f'Have created: {path_pixmap_after=}') | |
| 276 | |
| 277 import gentle_compare | |
| 278 rms_before = gentle_compare.pixmaps_rms(pixmap, pixmap_before) | |
| 279 rms_after = gentle_compare.pixmaps_rms(pixmap, pixmap_after) | |
| 280 print(f'{rms_before=}') | |
| 281 print(f'{rms_after=}') | |
| 282 | |
| 283 # Create .png file showing differences between <path> and <path_after>. | |
| 284 path_pixmap_after_diff = f'{path_after}.diff.png' | |
| 285 pixmap_after_diff = gentle_compare.pixmaps_diff(pixmap, pixmap_after) | |
| 286 pixmap_after_diff.save(path_pixmap_after_diff) | |
| 287 print(f'Have created: {path_pixmap_after_diff}') | |
| 288 | |
| 289 # Extract text from <path>, <path_before> and <path_after> and write to | |
| 290 # files so we can show differences with `diff`. | |
| 291 path_text = os.path.normpath(f'{__file__}/../../tests/test_4457.txt') | |
| 292 path_text_before = f'{path_text}.before.txt' | |
| 293 path_text_after = f'{path_text}.after.txt' | |
| 294 with open(path_text, 'w', encoding='utf8') as f: | |
| 295 f.write(text) | |
| 296 with open(path_text_before, 'w', encoding='utf8') as f: | |
| 297 f.write(text_before) | |
| 298 with open(path_text_after, 'w', encoding='utf8') as f: | |
| 299 f.write(text_after) | |
| 300 | |
| 301 # Can't write text to stdout on Windows because of encoding errors. | |
| 302 if platform.system() != 'Windows': | |
| 303 print(f'text:\n{textwrap.indent(text, " ")}') | |
| 304 print(f'text_before:\n{textwrap.indent(text_before, " ")}') | |
| 305 print(f'text_after:\n{textwrap.indent(text_after, " ")}') | |
| 306 print(f'{path_text=}') | |
| 307 print(f'{path_text_before=}') | |
| 308 print(f'{path_text_after=}') | |
| 309 | |
| 310 command = f'diff -u {path_text} {path_text_before}' | |
| 311 print(f'Running: {command}', flush=1) | |
| 312 subprocess.run(command, shell=1) | |
| 313 | |
| 314 command = f'diff -u {path_text} {path_text_after}' | |
| 315 print(f'Running: {command}', flush=1) | |
| 316 subprocess.run(command, shell=1) | |
| 317 | |
| 318 assert text_before == text | |
| 319 assert rms_before == 0 | |
| 320 | |
| 321 if pymupdf.mupdf_version_tuple >= (1, 26, 6): | |
| 322 assert rms_after == 0 | |
| 323 else: | |
| 324 # As of 2025-05-20 there are some differences in some characters, | |
| 325 # e.g. the non-ascii characters in `Philipp Krahenbuhl`. See | |
| 326 # <path_pixmap> and <path_pixmap_after>. | |
| 327 assert abs(rms_after - rms_old_after_max) < 2 | |
| 328 | |
| 329 # Avoid test failure caused by mupdf warnings. | |
| 330 wt = pymupdf.TOOLS.mupdf_warnings() | |
| 331 print(f'{wt=}') | |
| 332 assert wt == 'bogus font ascent/descent values (0 / 0)\n... repeated 5 times...' |
