comparison tests/test_memory.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children a6bc019ac0b2
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 import pymupdf
2
3 import gc
4 import os
5 import platform
6 import sys
7
8
9 def merge_pdf(content: bytes, coverpage: bytes):
10 with pymupdf.Document(stream=coverpage, filetype='pdf') as coverpage_pdf:
11 with pymupdf.Document(stream=content, filetype='pdf') as content_pdf:
12 coverpage_pdf.insert_pdf(content_pdf)
13 doc = coverpage_pdf.write()
14 return doc
15
16 def test_2791():
17 '''
18 Check for memory leaks.
19 '''
20 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
21 print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.')
22 return
23 if platform.system().startswith('MSYS_NT-'):
24 print(f'test_2791(): not running on msys2 - psutil not available.')
25 return
26 #stat_type = 'tracemalloc'
27 stat_type = 'psutil'
28 if stat_type == 'tracemalloc':
29 import tracemalloc
30 tracemalloc.start(10)
31 def get_stat():
32 current, peak = tracemalloc.get_traced_memory()
33 return current
34 elif stat_type == 'psutil':
35 # We use RSS, as used by mprof.
36 import psutil
37 process = psutil.Process()
38 def get_stat():
39 return process.memory_info().rss
40 else:
41 def get_stat():
42 return 0
43 n = 1000
44 verbose = False
45 if platform.python_implementation() == 'GraalVM':
46 n = 10
47 verbose = True
48 stats = [1] * n
49 for i in range(n):
50 if verbose:
51 print(f'{i+1}/{n}.', flush=1)
52 root = os.path.abspath(f'{__file__}/../../tests/resources')
53 with open(f'{root}/test_2791_content.pdf', 'rb') as content_pdf:
54 with open(f'{root}/test_2791_coverpage.pdf', 'rb') as coverpage_pdf:
55 content = content_pdf.read()
56 coverpage = coverpage_pdf.read()
57 merge_pdf(content, coverpage)
58 sys.stdout.flush()
59
60 gc.collect()
61 stats[i] = get_stat()
62
63 print(f'Memory usage {stat_type=}.')
64 for i, stat in enumerate(stats):
65 sys.stdout.write(f' {stat}')
66 #print(f' {i}: {stat}')
67 sys.stdout.write('\n')
68 first = stats[2]
69 last = stats[-1]
70 ratio = last / first
71 print(f'{first=} {last=} {ratio=}')
72
73 if platform.system() != 'Linux':
74 # Values from psutil indicate larger memory leaks on non-Linux. Don't
75 # yet know whether this is because rss is measured differently or a
76 # genuine leak is being exposed.
77 print(f'test_2791(): not asserting ratio because not running on Linux.')
78 elif not hasattr(pymupdf, 'mupdf'):
79 # Classic implementation has unfixed leaks.
80 print(f'test_2791(): not asserting ratio because using classic implementation.')
81 elif [int(x) for x in platform.python_version_tuple()[:2]] < [3, 11]:
82 print(f'test_2791(): not asserting ratio because python version less than 3.11: {platform.python_version()=}.')
83 elif stat_type == 'tracemalloc':
84 # With tracemalloc Before fix to src/extra.i's calls to
85 # PyObject_CallMethodObjArgs, ratio was 4.26; after it was 1.40.
86 assert ratio > 1 and ratio < 1.6
87 elif stat_type == 'psutil':
88 # Prior to fix, ratio was 1.043. After the fix, improved to 1.005, but
89 # varies and sometimes as high as 1.010.
90 # 2024-06-03: have seen 0.99919 on musl linux, and sebras reports .025.
91 assert ratio >= 0.990 and ratio < 1.027, f'{ratio=}'
92 else:
93 pass
94
95
96 def test_4090():
97 print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.')
98 import psutil
99 process = psutil.Process()
100 rsss = list()
101 def rss():
102 ret = process.memory_info().rss
103 rsss.append(ret)
104 return ret
105
106 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4090.pdf')
107 for i in range(100):
108 d = dict()
109 d[i] = dict()
110 with pymupdf.open(path) as document:
111 for j, page in enumerate(document):
112 d[i][j] = page.get_text('rawdict')
113 print(f'test_4090(): {i}: {rss()=}')
114 print(f'test_4090(): {rss()=}')
115 gc.collect()
116 print(f'test_4090(): {rss()=}')
117 r1 = rsss[2]
118 r2 = rsss[-1]
119 r = r2 / r1
120 if platform.system() == 'Windows':
121 assert 0.93 <= r < 1.05, f'{r1=} {r2=} {r=}.'
122 else:
123 assert 0.95 <= r < 1.05, f'{r1=} {r2=} {r=}.'
124
125
126 def show_tracemalloc_diff(snapshot1, snapshot2):
127 top_stats = snapshot2.compare_to(snapshot1, 'lineno')
128 n = 0
129 mem = 0
130 for i in top_stats:
131 n += i.count
132 mem += i.size
133 print(f'{n=}')
134 print(f'{mem=}')
135 print("Top 10:")
136 for stat in top_stats[:10]:
137 print(f' {stat}')
138 snapshot_diff = snapshot2.compare_to(snapshot1, key_type='lineno')
139 print(f'snapshot_diff:')
140 count_diff = 0
141 size_diff = 0
142 for i, s in enumerate(snapshot_diff):
143 print(f' {i}: {s.count=} {s.count_diff=} {s.size=} {s.size_diff=} {s.traceback=}')
144 count_diff += s.count_diff
145 size_diff += s.size_diff
146 print(f'{count_diff=} {size_diff=}')
147
148
149
150 def test_4125():
151 if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1':
152 print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.')
153 return
154 if platform.system().startswith('MSYS_NT-'):
155 print(f'test_4125(): not running on msys2 - psutil not available.')
156 return
157
158 print('')
159 print(f'test_4125(): {platform.python_version()=}.')
160
161 path = os.path.normpath(f'{__file__}/../../tests/resources/test_4125.pdf')
162 import gc
163 import psutil
164
165 root = os.path.normpath(f'{__file__}/../..')
166 sys.path.insert(0, root)
167 try:
168 import pipcl
169 finally:
170 del sys.path[0]
171
172 process = psutil.Process()
173
174 class State: pass
175 state = State()
176 state.rsss = list()
177 state.prev = None
178
179 def get_stat():
180 rss = process.memory_info().rss
181 if not state.rsss:
182 state.prev = rss
183 state.rsss.append(rss)
184 drss = rss - state.prev
185 state.prev = rss
186 print(f'test_4125():'
187 f' {rss=:,}'
188 f' rss-rss0={rss-state.rsss[0]:,}'
189 f' drss={drss:,}'
190 f'.'
191 )
192
193 for i in range(10):
194 with pymupdf.open(path) as document:
195 for page in document:
196 for image_info in page.get_images(full=True):
197 xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter_, referencer = image_info
198 pixmap = pymupdf.Pixmap(document, xref)
199 if pixmap.colorspace != pymupdf.csRGB:
200 pixmap2 = pymupdf.Pixmap(pymupdf.csRGB, pixmap)
201 del pixmap2
202 del pixmap
203 pymupdf.TOOLS.store_shrink(100)
204 pymupdf.TOOLS.glyph_cache_empty()
205 gc.collect()
206 get_stat()
207
208 if platform.system() == 'Linux':
209 rss_delta = state.rsss[-1] - state.rsss[3]
210 print(f'{rss_delta=}')
211 pv = platform.python_version_tuple()
212 pv = (int(pv[0]), int(pv[1]))
213 if pv < (3, 11):
214 # Python < 3.11 has less reliable memory usage so we exclude.
215 print(f'test_4125(): Not checking on {platform.python_version()=} because < 3.11.')
216 elif pymupdf.mupdf_version_tuple < (1, 25, 2):
217 rss_delta_expected = 4915200 * (len(state.rsss) - 3)
218 assert abs(1 - rss_delta / rss_delta_expected) < 0.15, f'{rss_delta_expected=}'
219 else:
220 # Before the fix, each iteration would leak 4.9MB.
221 rss_delta_max = 100*1000 * (len(state.rsss) - 3)
222 assert rss_delta < rss_delta_max
223 else:
224 # Unfortunately on non-Linux Github test machines the RSS values seem
225 # to vary a lot, which causes spurious test failures. So for at least
226 # we don't actually check.
227 #
228 print(f'Not checking results because non-Linux behaviour is too variable.')