Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/scripts/cmapdump.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 # Parse a CMap file and dump it as a C struct. | |
| 4 | |
| 5 import sys | |
| 6 | |
| 7 # Decode a subset of CMap syntax (only what is needed for our built-in resources) | |
| 8 # We require that tokens are whitespace separated. | |
| 9 | |
| 10 def dumpcmap(filename): | |
| 11 codespacerange = [] | |
| 12 usecmap = "" | |
| 13 cmapname = "" | |
| 14 wmode = 0 | |
| 15 | |
| 16 map = {} | |
| 17 | |
| 18 def tocode(s): | |
| 19 if s[0] == '<' and s[-1] == '>': | |
| 20 return int(s[1:-1], 16) | |
| 21 return int(s, 10) | |
| 22 | |
| 23 def map_cidchar(lo, v): | |
| 24 map[lo] = v | |
| 25 | |
| 26 def map_cidrange(lo, hi, v): | |
| 27 while lo <= hi: | |
| 28 map[lo] = v | |
| 29 lo = lo + 1 | |
| 30 v = v + 1 | |
| 31 | |
| 32 def add_bf(lo, v): | |
| 33 # Decode unicode surrogate pairs | |
| 34 if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff: | |
| 35 map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000 | |
| 36 elif len(v) == 1: | |
| 37 map[lo] = v[0] | |
| 38 elif len(v) <= 8: | |
| 39 map[lo] = v[:] | |
| 40 else: | |
| 41 print("/* warning: too long one-to-many mapping: %s */" % (v)) | |
| 42 | |
| 43 def map_bfchar(lo, bf): | |
| 44 bf = bf[1:-1] # drop < > | |
| 45 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] | |
| 46 add_bf(lo, v) | |
| 47 | |
| 48 def map_bfrange(lo, hi, bf): | |
| 49 bf = bf[1:-1] # drop < > | |
| 50 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] | |
| 51 while lo <= hi: | |
| 52 add_bf(lo, v) | |
| 53 lo = lo + 1 | |
| 54 v[-1] = v[-1] + 1 | |
| 55 | |
| 56 current = None | |
| 57 for line in open(filename, "r").readlines(): | |
| 58 if line[0] == '%': | |
| 59 continue | |
| 60 line = line.strip().split() | |
| 61 if len(line) == 0: | |
| 62 continue | |
| 63 if line[0] == '/CMapName': | |
| 64 cmapname = line[1][1:] | |
| 65 elif line[0] == '/WMode': | |
| 66 wmode = int(line[1]) | |
| 67 elif len(line) > 1 and line[1] == 'usecmap': | |
| 68 usecmap = line[0][1:] | |
| 69 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange' | |
| 70 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange' | |
| 71 elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange' | |
| 72 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar' | |
| 73 elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar' | |
| 74 elif line[0] == 'begincodespacerange': current = 'codespacerange' | |
| 75 elif line[0] == 'begincidrange': current = 'cidrange' | |
| 76 elif line[0] == 'beginbfrange': current = 'bfrange' | |
| 77 elif line[0] == 'begincidchar': current = 'cidchar' | |
| 78 elif line[0] == 'beginbfchar': current = 'bfchar' | |
| 79 elif line[0].startswith("end"): | |
| 80 current = None | |
| 81 elif current == 'codespacerange' and len(line) == 2: | |
| 82 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1]) | |
| 83 codespacerange.append((n, a, b)) | |
| 84 elif current == 'cidrange' and len(line) == 3: | |
| 85 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2]) | |
| 86 map_cidrange(a, b, c) | |
| 87 elif current == 'cidchar' and len(line) == 2: | |
| 88 a, b = tocode(line[0]), tocode(line[1]) | |
| 89 map_cidchar(a, b) | |
| 90 elif current == 'bfchar' and len(line) == 2: | |
| 91 a, b = tocode(line[0]), line[1] | |
| 92 map_bfchar(a, b) | |
| 93 elif current == 'bfrange' and len(line) == 3: | |
| 94 a, b, c = tocode(line[0]), tocode(line[1]), line[2] | |
| 95 map_bfrange(a, b, c) | |
| 96 | |
| 97 # Create ranges | |
| 98 | |
| 99 ranges = [] | |
| 100 xranges = [] | |
| 101 mranges = [] | |
| 102 mdata = [] | |
| 103 | |
| 104 out_lo = -100 | |
| 105 out_hi = -100 | |
| 106 out_v_lo = 0 | |
| 107 out_v_hi = 0 | |
| 108 | |
| 109 def flush_range(): | |
| 110 if out_lo >= 0: | |
| 111 if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff: | |
| 112 xranges.append((out_lo, out_hi, out_v_lo)) | |
| 113 else: | |
| 114 ranges.append((out_lo, out_hi, out_v_lo)) | |
| 115 | |
| 116 keys = list(map.keys()) | |
| 117 keys.sort() | |
| 118 for code in keys: | |
| 119 v = map[code] | |
| 120 if type(v) is not int: | |
| 121 flush_range() | |
| 122 out_lo = out_hi = -100 | |
| 123 mranges.append((code, len(mdata))) | |
| 124 mdata.append(len(v)) | |
| 125 mdata.extend(v) | |
| 126 else: | |
| 127 if code != out_hi + 1 or v != out_v_hi + 1: | |
| 128 flush_range() | |
| 129 out_lo = out_hi = code | |
| 130 out_v_lo = out_v_hi = v | |
| 131 else: | |
| 132 out_hi = out_hi + 1 | |
| 133 out_v_hi = out_v_hi + 1 | |
| 134 flush_range() | |
| 135 | |
| 136 # Print C file | |
| 137 | |
| 138 cname = cmapname.replace('-', '_') | |
| 139 | |
| 140 print() | |
| 141 print("/*", cmapname, "*/") | |
| 142 print() | |
| 143 | |
| 144 if len(ranges) > 0: | |
| 145 print("static const pdf_range cmap_%s_ranges[] = {" % cname) | |
| 146 for r in ranges: | |
| 147 print("{0x%x,0x%x,0x%x}," % r) | |
| 148 print("};") | |
| 149 print() | |
| 150 if len(xranges) > 0: | |
| 151 print("static const pdf_xrange cmap_%s_xranges[] = {" % cname) | |
| 152 for r in xranges: | |
| 153 print("{0x%x,0x%x,0x%x}," % r) | |
| 154 print("};") | |
| 155 print() | |
| 156 if len(mranges) > 0: | |
| 157 print("static const pdf_mrange cmap_%s_mranges[] = {" % cname) | |
| 158 for r in mranges: | |
| 159 print("{0x%x,0x%x}," % r) | |
| 160 print("};") | |
| 161 print() | |
| 162 print("static const int cmap_%s_table[] = {" % cname) | |
| 163 n = mdata[0] | |
| 164 i = 0 | |
| 165 for r in mdata: | |
| 166 if i <= n: | |
| 167 sys.stdout.write("0x%x," % r) | |
| 168 i = i + 1 | |
| 169 else: | |
| 170 sys.stdout.write("\n0x%x," % r) | |
| 171 i = 1 | |
| 172 n = r | |
| 173 sys.stdout.write("\n") | |
| 174 print("};") | |
| 175 print() | |
| 176 | |
| 177 print("static pdf_cmap cmap_%s = {" % cname) | |
| 178 print("\t{ -1, pdf_drop_cmap_imp },") | |
| 179 print("\t/* cmapname */ \"%s\"," % cmapname) | |
| 180 print("\t/* usecmap */ \"%s\", NULL," % usecmap) | |
| 181 print("\t/* wmode */ %d," % wmode) | |
| 182 print("\t/* codespaces */ %d, {" % len(codespacerange)) | |
| 183 if len(codespacerange) > 0: | |
| 184 for codespace in codespacerange: | |
| 185 fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2) | |
| 186 print(fmt % codespace) | |
| 187 else: | |
| 188 print("\t\t{ 0, 0, 0 },") | |
| 189 print("\t},") | |
| 190 | |
| 191 if len(ranges) > 0: | |
| 192 print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname)) | |
| 193 else: | |
| 194 print("\t0, 0, NULL, /* ranges */") | |
| 195 | |
| 196 if len(xranges) > 0: | |
| 197 print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname)) | |
| 198 else: | |
| 199 print("\t0, 0, NULL, /* xranges */") | |
| 200 | |
| 201 if len(mranges) > 0: | |
| 202 print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname)) | |
| 203 else: | |
| 204 print("\t0, 0, NULL, /* mranges */") | |
| 205 | |
| 206 if len(mdata) > 0: | |
| 207 print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname)) | |
| 208 else: | |
| 209 print("\t0, 0, NULL, /* table */") | |
| 210 | |
| 211 print("\t0, 0, 0, NULL /* splay tree */") | |
| 212 print("};") | |
| 213 | |
| 214 print("/* This is an automatically generated file. Do not edit. */") | |
| 215 | |
| 216 for arg in sys.argv[1:]: | |
| 217 dumpcmap(arg) |
