Mercurial > hgrepos > Python2 > PyMuPDF
comparison tests/test_memory.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | a6bc019ac0b2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 1:1d09e1dec1d9 |
|---|---|
| 1 import pymupdf | |
| 2 | |
| 3 import gc | |
| 4 import os | |
| 5 import platform | |
| 6 import sys | |
| 7 | |
| 8 | |
| 9 def merge_pdf(content: bytes, coverpage: bytes): | |
| 10 with pymupdf.Document(stream=coverpage, filetype='pdf') as coverpage_pdf: | |
| 11 with pymupdf.Document(stream=content, filetype='pdf') as content_pdf: | |
| 12 coverpage_pdf.insert_pdf(content_pdf) | |
| 13 doc = coverpage_pdf.write() | |
| 14 return doc | |
| 15 | |
| 16 def test_2791(): | |
| 17 ''' | |
| 18 Check for memory leaks. | |
| 19 ''' | |
| 20 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': | |
| 21 print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') | |
| 22 return | |
| 23 if platform.system().startswith('MSYS_NT-'): | |
| 24 print(f'test_2791(): not running on msys2 - psutil not available.') | |
| 25 return | |
| 26 #stat_type = 'tracemalloc' | |
| 27 stat_type = 'psutil' | |
| 28 if stat_type == 'tracemalloc': | |
| 29 import tracemalloc | |
| 30 tracemalloc.start(10) | |
| 31 def get_stat(): | |
| 32 current, peak = tracemalloc.get_traced_memory() | |
| 33 return current | |
| 34 elif stat_type == 'psutil': | |
| 35 # We use RSS, as used by mprof. | |
| 36 import psutil | |
| 37 process = psutil.Process() | |
| 38 def get_stat(): | |
| 39 return process.memory_info().rss | |
| 40 else: | |
| 41 def get_stat(): | |
| 42 return 0 | |
| 43 n = 1000 | |
| 44 verbose = False | |
| 45 if platform.python_implementation() == 'GraalVM': | |
| 46 n = 10 | |
| 47 verbose = True | |
| 48 stats = [1] * n | |
| 49 for i in range(n): | |
| 50 if verbose: | |
| 51 print(f'{i+1}/{n}.', flush=1) | |
| 52 root = os.path.abspath(f'{__file__}/../../tests/resources') | |
| 53 with open(f'{root}/test_2791_content.pdf', 'rb') as content_pdf: | |
| 54 with open(f'{root}/test_2791_coverpage.pdf', 'rb') as coverpage_pdf: | |
| 55 content = content_pdf.read() | |
| 56 coverpage = coverpage_pdf.read() | |
| 57 merge_pdf(content, coverpage) | |
| 58 sys.stdout.flush() | |
| 59 | |
| 60 gc.collect() | |
| 61 stats[i] = get_stat() | |
| 62 | |
| 63 print(f'Memory usage {stat_type=}.') | |
| 64 for i, stat in enumerate(stats): | |
| 65 sys.stdout.write(f' {stat}') | |
| 66 #print(f' {i}: {stat}') | |
| 67 sys.stdout.write('\n') | |
| 68 first = stats[2] | |
| 69 last = stats[-1] | |
| 70 ratio = last / first | |
| 71 print(f'{first=} {last=} {ratio=}') | |
| 72 | |
| 73 if platform.system() != 'Linux': | |
| 74 # Values from psutil indicate larger memory leaks on non-Linux. Don't | |
| 75 # yet know whether this is because rss is measured differently or a | |
| 76 # genuine leak is being exposed. | |
| 77 print(f'test_2791(): not asserting ratio because not running on Linux.') | |
| 78 elif not hasattr(pymupdf, 'mupdf'): | |
| 79 # Classic implementation has unfixed leaks. | |
| 80 print(f'test_2791(): not asserting ratio because using classic implementation.') | |
| 81 elif [int(x) for x in platform.python_version_tuple()[:2]] < [3, 11]: | |
| 82 print(f'test_2791(): not asserting ratio because python version less than 3.11: {platform.python_version()=}.') | |
| 83 elif stat_type == 'tracemalloc': | |
| 84 # With tracemalloc Before fix to src/extra.i's calls to | |
| 85 # PyObject_CallMethodObjArgs, ratio was 4.26; after it was 1.40. | |
| 86 assert ratio > 1 and ratio < 1.6 | |
| 87 elif stat_type == 'psutil': | |
| 88 # Prior to fix, ratio was 1.043. After the fix, improved to 1.005, but | |
| 89 # varies and sometimes as high as 1.010. | |
| 90 # 2024-06-03: have seen 0.99919 on musl linux, and sebras reports .025. | |
| 91 assert ratio >= 0.990 and ratio < 1.027, f'{ratio=}' | |
| 92 else: | |
| 93 pass | |
| 94 | |
| 95 | |
| 96 def test_4090(): | |
| 97 print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.') | |
| 98 import psutil | |
| 99 process = psutil.Process() | |
| 100 rsss = list() | |
| 101 def rss(): | |
| 102 ret = process.memory_info().rss | |
| 103 rsss.append(ret) | |
| 104 return ret | |
| 105 | |
| 106 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4090.pdf') | |
| 107 for i in range(100): | |
| 108 d = dict() | |
| 109 d[i] = dict() | |
| 110 with pymupdf.open(path) as document: | |
| 111 for j, page in enumerate(document): | |
| 112 d[i][j] = page.get_text('rawdict') | |
| 113 print(f'test_4090(): {i}: {rss()=}') | |
| 114 print(f'test_4090(): {rss()=}') | |
| 115 gc.collect() | |
| 116 print(f'test_4090(): {rss()=}') | |
| 117 r1 = rsss[2] | |
| 118 r2 = rsss[-1] | |
| 119 r = r2 / r1 | |
| 120 if platform.system() == 'Windows': | |
| 121 assert 0.93 <= r < 1.05, f'{r1=} {r2=} {r=}.' | |
| 122 else: | |
| 123 assert 0.95 <= r < 1.05, f'{r1=} {r2=} {r=}.' | |
| 124 | |
| 125 | |
| 126 def show_tracemalloc_diff(snapshot1, snapshot2): | |
| 127 top_stats = snapshot2.compare_to(snapshot1, 'lineno') | |
| 128 n = 0 | |
| 129 mem = 0 | |
| 130 for i in top_stats: | |
| 131 n += i.count | |
| 132 mem += i.size | |
| 133 print(f'{n=}') | |
| 134 print(f'{mem=}') | |
| 135 print("Top 10:") | |
| 136 for stat in top_stats[:10]: | |
| 137 print(f' {stat}') | |
| 138 snapshot_diff = snapshot2.compare_to(snapshot1, key_type='lineno') | |
| 139 print(f'snapshot_diff:') | |
| 140 count_diff = 0 | |
| 141 size_diff = 0 | |
| 142 for i, s in enumerate(snapshot_diff): | |
| 143 print(f' {i}: {s.count=} {s.count_diff=} {s.size=} {s.size_diff=} {s.traceback=}') | |
| 144 count_diff += s.count_diff | |
| 145 size_diff += s.size_diff | |
| 146 print(f'{count_diff=} {size_diff=}') | |
| 147 | |
| 148 | |
| 149 | |
| 150 def test_4125(): | |
| 151 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': | |
| 152 print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') | |
| 153 return | |
| 154 if platform.system().startswith('MSYS_NT-'): | |
| 155 print(f'test_4125(): not running on msys2 - psutil not available.') | |
| 156 return | |
| 157 | |
| 158 print('') | |
| 159 print(f'test_4125(): {platform.python_version()=}.') | |
| 160 | |
| 161 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4125.pdf') | |
| 162 import gc | |
| 163 import psutil | |
| 164 | |
| 165 root = os.path.normpath(f'{__file__}/../..') | |
| 166 sys.path.insert(0, root) | |
| 167 try: | |
| 168 import pipcl | |
| 169 finally: | |
| 170 del sys.path[0] | |
| 171 | |
| 172 process = psutil.Process() | |
| 173 | |
| 174 class State: pass | |
| 175 state = State() | |
| 176 state.rsss = list() | |
| 177 state.prev = None | |
| 178 | |
| 179 def get_stat(): | |
| 180 rss = process.memory_info().rss | |
| 181 if not state.rsss: | |
| 182 state.prev = rss | |
| 183 state.rsss.append(rss) | |
| 184 drss = rss - state.prev | |
| 185 state.prev = rss | |
| 186 print(f'test_4125():' | |
| 187 f' {rss=:,}' | |
| 188 f' rss-rss0={rss-state.rsss[0]:,}' | |
| 189 f' drss={drss:,}' | |
| 190 f'.' | |
| 191 ) | |
| 192 | |
| 193 for i in range(10): | |
| 194 with pymupdf.open(path) as document: | |
| 195 for page in document: | |
| 196 for image_info in page.get_images(full=True): | |
| 197 xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter_, referencer = image_info | |
| 198 pixmap = pymupdf.Pixmap(document, xref) | |
| 199 if pixmap.colorspace != pymupdf.csRGB: | |
| 200 pixmap2 = pymupdf.Pixmap(pymupdf.csRGB, pixmap) | |
| 201 del pixmap2 | |
| 202 del pixmap | |
| 203 pymupdf.TOOLS.store_shrink(100) | |
| 204 pymupdf.TOOLS.glyph_cache_empty() | |
| 205 gc.collect() | |
| 206 get_stat() | |
| 207 | |
| 208 if platform.system() == 'Linux': | |
| 209 rss_delta = state.rsss[-1] - state.rsss[3] | |
| 210 print(f'{rss_delta=}') | |
| 211 pv = platform.python_version_tuple() | |
| 212 pv = (int(pv[0]), int(pv[1])) | |
| 213 if pv < (3, 11): | |
| 214 # Python < 3.11 has less reliable memory usage so we exclude. | |
| 215 print(f'test_4125(): Not checking on {platform.python_version()=} because < 3.11.') | |
| 216 elif pymupdf.mupdf_version_tuple < (1, 25, 2): | |
| 217 rss_delta_expected = 4915200 * (len(state.rsss) - 3) | |
| 218 assert abs(1 - rss_delta / rss_delta_expected) < 0.15, f'{rss_delta_expected=}' | |
| 219 else: | |
| 220 # Before the fix, each iteration would leak 4.9MB. | |
| 221 rss_delta_max = 100*1000 * (len(state.rsss) - 3) | |
| 222 assert rss_delta < rss_delta_max | |
| 223 else: | |
| 224 # Unfortunately on non-Linux Github test machines the RSS values seem | |
| 225 # to vary a lot, which causes spurious test failures. So for at least | |
| 226 # we don't actually check. | |
| 227 # | |
| 228 print(f'Not checking results because non-Linux behaviour is too variable.') |
