comparison tests/gentle_compare.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents 1d09e1dec1d9
children a6bc019ac0b2
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 import math
2
3 import pymupdf
4
5
6 def gentle_compare(w0, w1):
7 """Check lists of "words" extractions for approximate equality.
8
9 * both lists must have same length
10 * word items must contain same word strings
11 * word rectangles must be approximately equal
12 """
13 tolerance = 1e-3 # maximum (Euclidean) norm of difference rectangle
14 word_count = len(w0) # number of words
15 if word_count != len(w1):
16 print(f"different number of words: {word_count}/{len(w1)}")
17 return False
18 for i in range(word_count):
19 if w0[i][4] != w1[i][4]: # word strings must be the same
20 print(f"word {i} mismatch")
21 return False
22 r0 = pymupdf.Rect(w0[i][:4]) # rect of first word
23 r1 = pymupdf.Rect(w1[i][:4]) # rect of second word
24 delta = (r1 - r0).norm() # norm of difference rectangle
25 if delta > tolerance:
26 print(f"word {i}: rectangle mismatch {delta}")
27 return False
28 return True
29
30
31 def rms(a, b, verbose=None, out_prefix=''):
32 '''
33 Returns RMS diff of raw bytes of two sequences.
34 '''
35 if verbose is True:
36 verbose = 100000
37 assert len(a) == len(b)
38 e = 0
39 for i, (aa, bb) in enumerate(zip(a, b)):
40 if verbose and (i % verbose == 0):
41 print(f'{out_prefix}rms(): {i=} {e=} {aa=} {aa=}.')
42 e += (aa - bb) ** 2
43 rms = math.sqrt(e / len(a))
44 return rms
45
46
47 def pixmaps_rms(a, b, out_prefix=''):
48 '''
49 Returns RMS diff of raw bytes of two pixmaps.
50
51 We assert that the pixmaps/sequences are the same size.
52
53 <a> and <b> can each be a pymupdf.Pixmap or path of a bitmap file.
54 '''
55 if isinstance(a, str):
56 print(f'{out_prefix}pixmaps_rms(): reading pixmap from {a=}.')
57 a = pymupdf.Pixmap(a)
58 if isinstance(b, str):
59 print(f'{out_prefix}pixmaps_rms(): reading pixmap from {b=}.')
60 b = pymupdf.Pixmap(b)
61 assert a.irect == b.irect, f'Differing rects: {a.irect=} {b.irect=}.'
62 a_mv = a.samples_mv
63 b_mv = b.samples_mv
64 assert len(a_mv) == len(b_mv)
65 ret = rms(a_mv, b_mv, verbose=True, out_prefix=out_prefix)
66 print(f'{out_prefix}pixmaps_rms(): {ret=}.')
67 return ret
68
69
70 def pixmaps_diff(a, b, out_prefix=''):
71 '''
72 Returns a pymupdf.Pixmap that represents the difference between pixmaps <a>
73 and <b>.
74
75 Each byte in the returned pixmap is `128 + (b_byte - a_byte) // 2`.
76 '''
77 if isinstance(a, str):
78 print(f'{out_prefix}pixmaps_rms(): reading pixmap from {a=}.')
79 a = pymupdf.Pixmap(a)
80 if isinstance(b, str):
81 print(f'{out_prefix}pixmaps_rms(): reading pixmap from {b=}.')
82 b = pymupdf.Pixmap(b)
83 assert a.irect == b.irect, f'Differing rects: {a.irect=} {b.irect=}.'
84 a_mv = a.samples_mv
85 b_mv = b.samples_mv
86 c = pymupdf.Pixmap(a.tobytes())
87 c_mv = c.samples_mv
88 assert len(a_mv) == len(b_mv) == len(c_mv)
89 if 1:
90 print(f'{len(a_mv)=}')
91 for i, (a_byte, b_byte, c_byte) in enumerate(zip(a_mv, b_mv, c_mv)):
92 assert 0 <= a_byte < 256
93 assert 0 <= b_byte < 256
94 assert 0 <= c_byte < 256
95 # Set byte to 128 plus half the diff so we represent the full
96 # -255..+255 range.
97 c_mv[i] = 128 + (b_byte - a_byte) // 2
98 return c