Mercurial > hgrepos > Python2 > PyMuPDF
diff tests/test_memory.py @ 1:1d09e1dec1d9 upstream
ADD: PyMuPDF v1.26.4: the original sdist.
It does not yet contain MuPDF. This normally will be downloaded when
building PyMuPDF.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:37:51 +0200 |
| parents | |
| children | a6bc019ac0b2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_memory.py Mon Sep 15 11:37:51 2025 +0200 @@ -0,0 +1,228 @@ +import pymupdf + +import gc +import os +import platform +import sys + + +def merge_pdf(content: bytes, coverpage: bytes): + with pymupdf.Document(stream=coverpage, filetype='pdf') as coverpage_pdf: + with pymupdf.Document(stream=content, filetype='pdf') as content_pdf: + coverpage_pdf.insert_pdf(content_pdf) + doc = coverpage_pdf.write() + return doc + +def test_2791(): + ''' + Check for memory leaks. + ''' + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') + return + if platform.system().startswith('MSYS_NT-'): + print(f'test_2791(): not running on msys2 - psutil not available.') + return + #stat_type = 'tracemalloc' + stat_type = 'psutil' + if stat_type == 'tracemalloc': + import tracemalloc + tracemalloc.start(10) + def get_stat(): + current, peak = tracemalloc.get_traced_memory() + return current + elif stat_type == 'psutil': + # We use RSS, as used by mprof. + import psutil + process = psutil.Process() + def get_stat(): + return process.memory_info().rss + else: + def get_stat(): + return 0 + n = 1000 + verbose = False + if platform.python_implementation() == 'GraalVM': + n = 10 + verbose = True + stats = [1] * n + for i in range(n): + if verbose: + print(f'{i+1}/{n}.', flush=1) + root = os.path.abspath(f'{__file__}/../../tests/resources') + with open(f'{root}/test_2791_content.pdf', 'rb') as content_pdf: + with open(f'{root}/test_2791_coverpage.pdf', 'rb') as coverpage_pdf: + content = content_pdf.read() + coverpage = coverpage_pdf.read() + merge_pdf(content, coverpage) + sys.stdout.flush() + + gc.collect() + stats[i] = get_stat() + + print(f'Memory usage {stat_type=}.') + for i, stat in enumerate(stats): + sys.stdout.write(f' {stat}') + #print(f' {i}: {stat}') + sys.stdout.write('\n') + first = stats[2] + last = stats[-1] + ratio = last / first + print(f'{first=} {last=} {ratio=}') + + if platform.system() != 'Linux': + # Values from psutil indicate larger memory leaks on non-Linux. Don't + # yet know whether this is because rss is measured differently or a + # genuine leak is being exposed. + print(f'test_2791(): not asserting ratio because not running on Linux.') + elif not hasattr(pymupdf, 'mupdf'): + # Classic implementation has unfixed leaks. + print(f'test_2791(): not asserting ratio because using classic implementation.') + elif [int(x) for x in platform.python_version_tuple()[:2]] < [3, 11]: + print(f'test_2791(): not asserting ratio because python version less than 3.11: {platform.python_version()=}.') + elif stat_type == 'tracemalloc': + # With tracemalloc Before fix to src/extra.i's calls to + # PyObject_CallMethodObjArgs, ratio was 4.26; after it was 1.40. + assert ratio > 1 and ratio < 1.6 + elif stat_type == 'psutil': + # Prior to fix, ratio was 1.043. After the fix, improved to 1.005, but + # varies and sometimes as high as 1.010. + # 2024-06-03: have seen 0.99919 on musl linux, and sebras reports .025. + assert ratio >= 0.990 and ratio < 1.027, f'{ratio=}' + else: + pass + + +def test_4090(): + print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.') + import psutil + process = psutil.Process() + rsss = list() + def rss(): + ret = process.memory_info().rss + rsss.append(ret) + return ret + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4090.pdf') + for i in range(100): + d = dict() + d[i] = dict() + with pymupdf.open(path) as document: + for j, page in enumerate(document): + d[i][j] = page.get_text('rawdict') + print(f'test_4090(): {i}: {rss()=}') + print(f'test_4090(): {rss()=}') + gc.collect() + print(f'test_4090(): {rss()=}') + r1 = rsss[2] + r2 = rsss[-1] + r = r2 / r1 + if platform.system() == 'Windows': + assert 0.93 <= r < 1.05, f'{r1=} {r2=} {r=}.' + else: + assert 0.95 <= r < 1.05, f'{r1=} {r2=} {r=}.' + + +def show_tracemalloc_diff(snapshot1, snapshot2): + top_stats = snapshot2.compare_to(snapshot1, 'lineno') + n = 0 + mem = 0 + for i in top_stats: + n += i.count + mem += i.size + print(f'{n=}') + print(f'{mem=}') + print("Top 10:") + for stat in top_stats[:10]: + print(f' {stat}') + snapshot_diff = snapshot2.compare_to(snapshot1, key_type='lineno') + print(f'snapshot_diff:') + count_diff = 0 + size_diff = 0 + for i, s in enumerate(snapshot_diff): + print(f' {i}: {s.count=} {s.count_diff=} {s.size=} {s.size_diff=} {s.traceback=}') + count_diff += s.count_diff + size_diff += s.size_diff + print(f'{count_diff=} {size_diff=}') + + + +def test_4125(): + if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': + print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') + return + if platform.system().startswith('MSYS_NT-'): + print(f'test_4125(): not running on msys2 - psutil not available.') + return + + print('') + print(f'test_4125(): {platform.python_version()=}.') + + path = os.path.normpath(f'{__file__}/../../tests/resources/test_4125.pdf') + import gc + import psutil + + root = os.path.normpath(f'{__file__}/../..') + sys.path.insert(0, root) + try: + import pipcl + finally: + del sys.path[0] + + process = psutil.Process() + + class State: pass + state = State() + state.rsss = list() + state.prev = None + + def get_stat(): + rss = process.memory_info().rss + if not state.rsss: + state.prev = rss + state.rsss.append(rss) + drss = rss - state.prev + state.prev = rss + print(f'test_4125():' + f' {rss=:,}' + f' rss-rss0={rss-state.rsss[0]:,}' + f' drss={drss:,}' + f'.' + ) + + for i in range(10): + with pymupdf.open(path) as document: + for page in document: + for image_info in page.get_images(full=True): + xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter_, referencer = image_info + pixmap = pymupdf.Pixmap(document, xref) + if pixmap.colorspace != pymupdf.csRGB: + pixmap2 = pymupdf.Pixmap(pymupdf.csRGB, pixmap) + del pixmap2 + del pixmap + pymupdf.TOOLS.store_shrink(100) + pymupdf.TOOLS.glyph_cache_empty() + gc.collect() + get_stat() + + if platform.system() == 'Linux': + rss_delta = state.rsss[-1] - state.rsss[3] + print(f'{rss_delta=}') + pv = platform.python_version_tuple() + pv = (int(pv[0]), int(pv[1])) + if pv < (3, 11): + # Python < 3.11 has less reliable memory usage so we exclude. + print(f'test_4125(): Not checking on {platform.python_version()=} because < 3.11.') + elif pymupdf.mupdf_version_tuple < (1, 25, 2): + rss_delta_expected = 4915200 * (len(state.rsss) - 3) + assert abs(1 - rss_delta / rss_delta_expected) < 0.15, f'{rss_delta_expected=}' + else: + # Before the fix, each iteration would leak 4.9MB. + rss_delta_max = 100*1000 * (len(state.rsss) - 3) + assert rss_delta < rss_delta_max + else: + # Unfortunately on non-Linux Github test machines the RSS values seem + # to vary a lot, which causes spurious test failures. So for at least + # we don't actually check. + # + print(f'Not checking results because non-Linux behaviour is too variable.')
