Mercurial > hgrepos > Python2 > PyMuPDF
view tests/test_memory.py @ 29:f76e6575dca9 v1.26.4+1
+++++ v1.26.4+1
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 19 Sep 2025 19:59:23 +0200 |
| parents | 1d09e1dec1d9 |
| children | a6bc019ac0b2 |
line wrap: on
line source
import pymupdf import gc import os import platform import sys def merge_pdf(content: bytes, coverpage: bytes): with pymupdf.Document(stream=coverpage, filetype='pdf') as coverpage_pdf: with pymupdf.Document(stream=content, filetype='pdf') as content_pdf: coverpage_pdf.insert_pdf(content_pdf) doc = coverpage_pdf.write() return doc def test_2791(): ''' Check for memory leaks. ''' if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': print(f'test_2791(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') return if platform.system().startswith('MSYS_NT-'): print(f'test_2791(): not running on msys2 - psutil not available.') return #stat_type = 'tracemalloc' stat_type = 'psutil' if stat_type == 'tracemalloc': import tracemalloc tracemalloc.start(10) def get_stat(): current, peak = tracemalloc.get_traced_memory() return current elif stat_type == 'psutil': # We use RSS, as used by mprof. import psutil process = psutil.Process() def get_stat(): return process.memory_info().rss else: def get_stat(): return 0 n = 1000 verbose = False if platform.python_implementation() == 'GraalVM': n = 10 verbose = True stats = [1] * n for i in range(n): if verbose: print(f'{i+1}/{n}.', flush=1) root = os.path.abspath(f'{__file__}/../../tests/resources') with open(f'{root}/test_2791_content.pdf', 'rb') as content_pdf: with open(f'{root}/test_2791_coverpage.pdf', 'rb') as coverpage_pdf: content = content_pdf.read() coverpage = coverpage_pdf.read() merge_pdf(content, coverpage) sys.stdout.flush() gc.collect() stats[i] = get_stat() print(f'Memory usage {stat_type=}.') for i, stat in enumerate(stats): sys.stdout.write(f' {stat}') #print(f' {i}: {stat}') sys.stdout.write('\n') first = stats[2] last = stats[-1] ratio = last / first print(f'{first=} {last=} {ratio=}') if platform.system() != 'Linux': # Values from psutil indicate larger memory leaks on non-Linux. Don't # yet know whether this is because rss is measured differently or a # genuine leak is being exposed. print(f'test_2791(): not asserting ratio because not running on Linux.') elif not hasattr(pymupdf, 'mupdf'): # Classic implementation has unfixed leaks. print(f'test_2791(): not asserting ratio because using classic implementation.') elif [int(x) for x in platform.python_version_tuple()[:2]] < [3, 11]: print(f'test_2791(): not asserting ratio because python version less than 3.11: {platform.python_version()=}.') elif stat_type == 'tracemalloc': # With tracemalloc Before fix to src/extra.i's calls to # PyObject_CallMethodObjArgs, ratio was 4.26; after it was 1.40. assert ratio > 1 and ratio < 1.6 elif stat_type == 'psutil': # Prior to fix, ratio was 1.043. After the fix, improved to 1.005, but # varies and sometimes as high as 1.010. # 2024-06-03: have seen 0.99919 on musl linux, and sebras reports .025. assert ratio >= 0.990 and ratio < 1.027, f'{ratio=}' else: pass def test_4090(): print(f'test_4090(): {os.environ.get("PYTHONMALLOC")=}.') import psutil process = psutil.Process() rsss = list() def rss(): ret = process.memory_info().rss rsss.append(ret) return ret path = os.path.normpath(f'{__file__}/../../tests/resources/test_4090.pdf') for i in range(100): d = dict() d[i] = dict() with pymupdf.open(path) as document: for j, page in enumerate(document): d[i][j] = page.get_text('rawdict') print(f'test_4090(): {i}: {rss()=}') print(f'test_4090(): {rss()=}') gc.collect() print(f'test_4090(): {rss()=}') r1 = rsss[2] r2 = rsss[-1] r = r2 / r1 if platform.system() == 'Windows': assert 0.93 <= r < 1.05, f'{r1=} {r2=} {r=}.' else: assert 0.95 <= r < 1.05, f'{r1=} {r2=} {r=}.' def show_tracemalloc_diff(snapshot1, snapshot2): top_stats = snapshot2.compare_to(snapshot1, 'lineno') n = 0 mem = 0 for i in top_stats: n += i.count mem += i.size print(f'{n=}') print(f'{mem=}') print("Top 10:") for stat in top_stats[:10]: print(f' {stat}') snapshot_diff = snapshot2.compare_to(snapshot1, key_type='lineno') print(f'snapshot_diff:') count_diff = 0 size_diff = 0 for i, s in enumerate(snapshot_diff): print(f' {i}: {s.count=} {s.count_diff=} {s.size=} {s.size_diff=} {s.traceback=}') count_diff += s.count_diff size_diff += s.size_diff print(f'{count_diff=} {size_diff=}') def test_4125(): if os.environ.get('PYMUPDF_RUNNING_ON_VALGRIND') == '1': print(f'test_4125(): not running because PYMUPDF_RUNNING_ON_VALGRIND=1.') return if platform.system().startswith('MSYS_NT-'): print(f'test_4125(): not running on msys2 - psutil not available.') return print('') print(f'test_4125(): {platform.python_version()=}.') path = os.path.normpath(f'{__file__}/../../tests/resources/test_4125.pdf') import gc import psutil root = os.path.normpath(f'{__file__}/../..') sys.path.insert(0, root) try: import pipcl finally: del sys.path[0] process = psutil.Process() class State: pass state = State() state.rsss = list() state.prev = None def get_stat(): rss = process.memory_info().rss if not state.rsss: state.prev = rss state.rsss.append(rss) drss = rss - state.prev state.prev = rss print(f'test_4125():' f' {rss=:,}' f' rss-rss0={rss-state.rsss[0]:,}' f' drss={drss:,}' f'.' ) for i in range(10): with pymupdf.open(path) as document: for page in document: for image_info in page.get_images(full=True): xref, smask, width, height, bpc, colorspace, alt_colorspace, name, filter_, referencer = image_info pixmap = pymupdf.Pixmap(document, xref) if pixmap.colorspace != pymupdf.csRGB: pixmap2 = pymupdf.Pixmap(pymupdf.csRGB, pixmap) del pixmap2 del pixmap pymupdf.TOOLS.store_shrink(100) pymupdf.TOOLS.glyph_cache_empty() gc.collect() get_stat() if platform.system() == 'Linux': rss_delta = state.rsss[-1] - state.rsss[3] print(f'{rss_delta=}') pv = platform.python_version_tuple() pv = (int(pv[0]), int(pv[1])) if pv < (3, 11): # Python < 3.11 has less reliable memory usage so we exclude. print(f'test_4125(): Not checking on {platform.python_version()=} because < 3.11.') elif pymupdf.mupdf_version_tuple < (1, 25, 2): rss_delta_expected = 4915200 * (len(state.rsss) - 3) assert abs(1 - rss_delta / rss_delta_expected) < 0.15, f'{rss_delta_expected=}' else: # Before the fix, each iteration would leak 4.9MB. rss_delta_max = 100*1000 * (len(state.rsss) - 3) assert rss_delta < rss_delta_max else: # Unfortunately on non-Linux Github test machines the RSS values seem # to vary a lot, which causes spurious test failures. So for at least # we don't actually check. # print(f'Not checking results because non-Linux behaviour is too variable.')
