comparison mupdf-source/scripts/cmapshare.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 #!/usr/bin/env python3
2
3 # Find and extract common CMap subsets.
4 # Taken flattened CMaps as input, using only the 'cidchar' sections.
5 # The outputs are truncated; so use 'cmapflatten.py' to clean them up.
6
7 import sys, os
8
9 def load_cmap_set(filename):
10 cmap = set()
11 active = False
12 for line in open(filename).readlines():
13 line = line.strip()
14 if line.endswith("endcidchar"): active = False
15 if active: cmap.add(line)
16 if line.endswith("begincidchar"): active = True
17 return cmap
18
19 def load_cmap_prologue(filename):
20 prologue = []
21 for line in open(filename).readlines():
22 line = line.strip()
23 if line.endswith("begincidchar"):
24 break
25 prologue.append(line)
26 return prologue
27
28 epilogue = [
29 'endcidchar',
30 ]
31
32 common_name = os.path.basename(sys.argv[1])
33
34 # First find the common subset
35 common = load_cmap_set(sys.argv[2])
36 for f in sys.argv[3:]:
37 common &= load_cmap_set(f)
38
39 def print_cmap(filename, prologue, cmap):
40 out = open(filename, "w")
41 for line in prologue:
42 if not line.endswith("usecmap"):
43 print(line, file=out)
44 if line == 'begincmap':
45 print("/"+common_name, "usecmap", file=out)
46 print(len(cmap), "begincidchar", file=out)
47 for line in sorted(cmap):
48 print(line, file=out)
49 for line in epilogue:
50 print(line, file=out)
51
52 # Print common subset
53 print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common)
54
55 # Now find unique bits
56 for f in sys.argv[2:]:
57 cmap = load_cmap_set(f) - common
58 prologue = load_cmap_prologue(f)
59 print_cmap(f+".shared", prologue, cmap)