comparison mupdf-source/scripts/cmapdump.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 #!/usr/bin/env python3
2
3 # Parse a CMap file and dump it as a C struct.
4
5 import sys
6
7 # Decode a subset of CMap syntax (only what is needed for our built-in resources)
8 # We require that tokens are whitespace separated.
9
10 def dumpcmap(filename):
11 codespacerange = []
12 usecmap = ""
13 cmapname = ""
14 wmode = 0
15
16 map = {}
17
18 def tocode(s):
19 if s[0] == '<' and s[-1] == '>':
20 return int(s[1:-1], 16)
21 return int(s, 10)
22
23 def map_cidchar(lo, v):
24 map[lo] = v
25
26 def map_cidrange(lo, hi, v):
27 while lo <= hi:
28 map[lo] = v
29 lo = lo + 1
30 v = v + 1
31
32 def add_bf(lo, v):
33 # Decode unicode surrogate pairs
34 if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff:
35 map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000
36 elif len(v) == 1:
37 map[lo] = v[0]
38 elif len(v) <= 8:
39 map[lo] = v[:]
40 else:
41 print("/* warning: too long one-to-many mapping: %s */" % (v))
42
43 def map_bfchar(lo, bf):
44 bf = bf[1:-1] # drop < >
45 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
46 add_bf(lo, v)
47
48 def map_bfrange(lo, hi, bf):
49 bf = bf[1:-1] # drop < >
50 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
51 while lo <= hi:
52 add_bf(lo, v)
53 lo = lo + 1
54 v[-1] = v[-1] + 1
55
56 current = None
57 for line in open(filename, "r").readlines():
58 if line[0] == '%':
59 continue
60 line = line.strip().split()
61 if len(line) == 0:
62 continue
63 if line[0] == '/CMapName':
64 cmapname = line[1][1:]
65 elif line[0] == '/WMode':
66 wmode = int(line[1])
67 elif len(line) > 1 and line[1] == 'usecmap':
68 usecmap = line[0][1:]
69 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
70 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
71 elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange'
72 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
73 elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar'
74 elif line[0] == 'begincodespacerange': current = 'codespacerange'
75 elif line[0] == 'begincidrange': current = 'cidrange'
76 elif line[0] == 'beginbfrange': current = 'bfrange'
77 elif line[0] == 'begincidchar': current = 'cidchar'
78 elif line[0] == 'beginbfchar': current = 'bfchar'
79 elif line[0].startswith("end"):
80 current = None
81 elif current == 'codespacerange' and len(line) == 2:
82 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
83 codespacerange.append((n, a, b))
84 elif current == 'cidrange' and len(line) == 3:
85 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
86 map_cidrange(a, b, c)
87 elif current == 'cidchar' and len(line) == 2:
88 a, b = tocode(line[0]), tocode(line[1])
89 map_cidchar(a, b)
90 elif current == 'bfchar' and len(line) == 2:
91 a, b = tocode(line[0]), line[1]
92 map_bfchar(a, b)
93 elif current == 'bfrange' and len(line) == 3:
94 a, b, c = tocode(line[0]), tocode(line[1]), line[2]
95 map_bfrange(a, b, c)
96
97 # Create ranges
98
99 ranges = []
100 xranges = []
101 mranges = []
102 mdata = []
103
104 out_lo = -100
105 out_hi = -100
106 out_v_lo = 0
107 out_v_hi = 0
108
109 def flush_range():
110 if out_lo >= 0:
111 if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff:
112 xranges.append((out_lo, out_hi, out_v_lo))
113 else:
114 ranges.append((out_lo, out_hi, out_v_lo))
115
116 keys = list(map.keys())
117 keys.sort()
118 for code in keys:
119 v = map[code]
120 if type(v) is not int:
121 flush_range()
122 out_lo = out_hi = -100
123 mranges.append((code, len(mdata)))
124 mdata.append(len(v))
125 mdata.extend(v)
126 else:
127 if code != out_hi + 1 or v != out_v_hi + 1:
128 flush_range()
129 out_lo = out_hi = code
130 out_v_lo = out_v_hi = v
131 else:
132 out_hi = out_hi + 1
133 out_v_hi = out_v_hi + 1
134 flush_range()
135
136 # Print C file
137
138 cname = cmapname.replace('-', '_')
139
140 print()
141 print("/*", cmapname, "*/")
142 print()
143
144 if len(ranges) > 0:
145 print("static const pdf_range cmap_%s_ranges[] = {" % cname)
146 for r in ranges:
147 print("{0x%x,0x%x,0x%x}," % r)
148 print("};")
149 print()
150 if len(xranges) > 0:
151 print("static const pdf_xrange cmap_%s_xranges[] = {" % cname)
152 for r in xranges:
153 print("{0x%x,0x%x,0x%x}," % r)
154 print("};")
155 print()
156 if len(mranges) > 0:
157 print("static const pdf_mrange cmap_%s_mranges[] = {" % cname)
158 for r in mranges:
159 print("{0x%x,0x%x}," % r)
160 print("};")
161 print()
162 print("static const int cmap_%s_table[] = {" % cname)
163 n = mdata[0]
164 i = 0
165 for r in mdata:
166 if i <= n:
167 sys.stdout.write("0x%x," % r)
168 i = i + 1
169 else:
170 sys.stdout.write("\n0x%x," % r)
171 i = 1
172 n = r
173 sys.stdout.write("\n")
174 print("};")
175 print()
176
177 print("static pdf_cmap cmap_%s = {" % cname)
178 print("\t{ -1, pdf_drop_cmap_imp },")
179 print("\t/* cmapname */ \"%s\"," % cmapname)
180 print("\t/* usecmap */ \"%s\", NULL," % usecmap)
181 print("\t/* wmode */ %d," % wmode)
182 print("\t/* codespaces */ %d, {" % len(codespacerange))
183 if len(codespacerange) > 0:
184 for codespace in codespacerange:
185 fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2)
186 print(fmt % codespace)
187 else:
188 print("\t\t{ 0, 0, 0 },")
189 print("\t},")
190
191 if len(ranges) > 0:
192 print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname))
193 else:
194 print("\t0, 0, NULL, /* ranges */")
195
196 if len(xranges) > 0:
197 print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname))
198 else:
199 print("\t0, 0, NULL, /* xranges */")
200
201 if len(mranges) > 0:
202 print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname))
203 else:
204 print("\t0, 0, NULL, /* mranges */")
205
206 if len(mdata) > 0:
207 print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname))
208 else:
209 print("\t0, 0, NULL, /* table */")
210
211 print("\t0, 0, 0, NULL /* splay tree */")
212 print("};")
213
214 print("/* This is an automatically generated file. Do not edit. */")
215
216 for arg in sys.argv[1:]:
217 dumpcmap(arg)