comparison mupdf-source/scripts/cmapflatten.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #!/usr/bin/env python3
2
3 # Parse a Uni* CMap file and flatten it.
4 #
5 # The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never
6 # 'bfchar' or 'bfrange'.
7
8 import sys
9
10 def flattencmap(filename):
11 codespacerange = []
12 usecmap = ""
13 cmapname = ""
14 cmapversion = "1.0"
15 csi_registry = "(Adobe)"
16 csi_ordering = "(Unknown)"
17 csi_supplement = 1
18 wmode = 0
19
20 map = {}
21
22 def tocode(s):
23 if s[0] == '<' and s[-1] == '>':
24 return int(s[1:-1], 16)
25 return int(s, 10)
26
27 def map_cidchar(lo, v):
28 map[lo] = v
29
30 def map_cidrange(lo, hi, v):
31 while lo <= hi:
32 map[lo] = v
33 lo = lo + 1
34 v = v + 1
35
36 current = None
37 for line in open(filename, "r").readlines():
38 if line[0] == '%':
39 continue
40 line = line.strip().split()
41 if len(line) == 0:
42 continue
43 if line[0] == '/CMapVersion': cmapversion = line[1]
44 elif line[0] == '/CMapName': cmapname = line[1][1:]
45 elif line[0] == '/WMode': wmode = int(line[1])
46 elif line[0] == '/Registry': csi_registry = line[1]
47 elif line[0] == '/Ordering': csi_ordering = line[1]
48 elif line[0] == '/Supplement': csi_supplement = line[1]
49 elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:]
50 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
51 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
52 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
53 elif line[0].startswith("end"):
54 current = None
55 elif current == 'codespacerange' and len(line) == 2:
56 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
57 codespacerange.append((n, a, b))
58 elif current == 'cidrange' and len(line) == 3:
59 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
60 map_cidrange(a, b, c)
61 elif current == 'cidchar' and len(line) == 2:
62 a, b = tocode(line[0]), tocode(line[1])
63 map_cidchar(a, b)
64
65 # Print flattened CMap file
66
67 print("%!PS-Adobe-3.0 Resource-CMap")
68 print("%%DocumentNeededResources: procset (CIDInit)")
69 print("%%IncludeResource: procset (CIDInit)")
70 print("%%%%BeginResource: CMap (%s)" % cmapname)
71 print("%%%%Version: %s" % cmapversion)
72 print("%%EndComments")
73 print("/CIDInit /ProcSet findresource begin")
74 print("12 dict begin")
75 print("begincmap")
76 if usecmap: print("/%s usecmap" % usecmap)
77 print("/CIDSystemInfo 3 dict dup begin")
78 print(" /Registry %s def" % csi_registry)
79 print(" /Ordering %s def" % csi_ordering)
80 print(" /Supplement %s def" % csi_supplement)
81 print("end def")
82 print("/CMapName /%s def" % cmapname)
83 print("/CMapVersion %s def" % cmapversion)
84 print("/CMapType 1 def")
85 print("/WMode %d def" % wmode)
86
87 if len(codespacerange):
88 print("%d begincodespacerange" % len(codespacerange))
89 for r in codespacerange:
90 fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2)
91 print(fmt % (r[1], r[2]))
92 print("endcodespacerange")
93
94 keys = list(map.keys())
95 keys.sort()
96 print("%d begincidchar" % len(keys))
97 for code in keys:
98 v = map[code]
99 print("<%04x> %d" % (code, v))
100 print("endcidchar")
101
102 print("endcmap")
103 print("CMapName currentdict /CMap defineresource pop")
104 print("end")
105 print("end")
106 print("%%EndResource")
107 print("%%EOF")
108
109 for arg in sys.argv[1:]:
110 flattencmap(arg)