Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/scripts/cmapclean.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 # Parse a CMap file and dump it back out. | |
| 4 | |
| 5 import sys | |
| 6 | |
| 7 # Decode a subset of CMap syntax (only what is needed for our built-in resources) | |
| 8 # We require that tokens are whitespace separated. | |
| 9 | |
| 10 def cleancmap(filename): | |
| 11 codespacerange = [] | |
| 12 usecmap = "" | |
| 13 cmapname = "" | |
| 14 cmapversion = "1.0" | |
| 15 csi_registry = "(Adobe)" | |
| 16 csi_ordering = "(Unknown)" | |
| 17 csi_supplement = 1 | |
| 18 wmode = 0 | |
| 19 isbf = False | |
| 20 | |
| 21 map = {} | |
| 22 | |
| 23 def tocode(s): | |
| 24 if s[0] == '<' and s[-1] == '>': | |
| 25 return int(s[1:-1], 16) | |
| 26 return int(s, 10) | |
| 27 | |
| 28 def map_cidchar(lo, v): | |
| 29 map[lo] = v | |
| 30 | |
| 31 def map_cidrange(lo, hi, v): | |
| 32 while lo <= hi: | |
| 33 map[lo] = v | |
| 34 lo = lo + 1 | |
| 35 v = v + 1 | |
| 36 | |
| 37 def add_bf(lo, v): | |
| 38 # Decode unicode surrogate pairs | |
| 39 if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff: | |
| 40 map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000 | |
| 41 elif len(v) == 1: | |
| 42 map[lo] = v[0] | |
| 43 elif len(v) <= 8: | |
| 44 map[lo] = v[:] | |
| 45 else: | |
| 46 print("/* warning: too long one-to-many mapping: %s */" % (v)) | |
| 47 | |
| 48 def map_bfchar(lo, bf): | |
| 49 bf = bf[1:-1] # drop < > | |
| 50 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] | |
| 51 add_bf(lo, v) | |
| 52 | |
| 53 def map_bfrange(lo, hi, bf): | |
| 54 bf = bf[1:-1] # drop < > | |
| 55 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] | |
| 56 while lo <= hi: | |
| 57 add_bf(lo, v) | |
| 58 lo = lo + 1 | |
| 59 v[-1] = v[-1] + 1 | |
| 60 | |
| 61 current = None | |
| 62 for line in open(filename, "r").readlines(): | |
| 63 if line[0] == '%': | |
| 64 continue | |
| 65 line = line.strip().split() | |
| 66 if len(line) == 0: | |
| 67 continue | |
| 68 if line[0] == '/CMapVersion': cmapversion = line[1] | |
| 69 elif line[0] == '/CMapName': cmapname = line[1][1:] | |
| 70 elif line[0] == '/WMode': wmode = int(line[1]) | |
| 71 elif line[0] == '/Registry': csi_registry = line[1] | |
| 72 elif line[0] == '/Ordering': csi_ordering = line[1] | |
| 73 elif line[0] == '/Supplement': csi_supplement = line[1] | |
| 74 elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:] | |
| 75 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange' | |
| 76 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange' | |
| 77 elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange'; isbf = True | |
| 78 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar' | |
| 79 elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar'; isbf = True | |
| 80 elif line[0] == 'begincodespacerange': current = 'codespacerange' | |
| 81 elif line[0] == 'begincidrange': current = 'cidrange' | |
| 82 elif line[0] == 'beginbfrange': current = 'bfrange'; isbf = True | |
| 83 elif line[0] == 'begincidchar': current = 'cidchar' | |
| 84 elif line[0] == 'beginbfchar': current = 'bfchar'; isbf = True | |
| 85 elif line[0].startswith("end"): | |
| 86 current = None | |
| 87 elif current == 'codespacerange' and len(line) == 2: | |
| 88 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1]) | |
| 89 codespacerange.append((n, a, b)) | |
| 90 elif current == 'cidrange' and len(line) == 3: | |
| 91 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2]) | |
| 92 map_cidrange(a, b, c) | |
| 93 elif current == 'cidchar' and len(line) == 2: | |
| 94 a, b = tocode(line[0]), tocode(line[1]) | |
| 95 map_cidchar(a, b) | |
| 96 elif current == 'bfchar' and len(line) == 2: | |
| 97 a, b = tocode(line[0]), line[1] | |
| 98 map_bfchar(a, b) | |
| 99 elif current == 'bfrange' and len(line) == 3: | |
| 100 a, b, c = tocode(line[0]), tocode(line[1]), line[2] | |
| 101 map_bfrange(a, b, c) | |
| 102 | |
| 103 # Create ranges | |
| 104 | |
| 105 singles = [] | |
| 106 ranges = [] | |
| 107 mranges = [] | |
| 108 | |
| 109 out_lo = -100 | |
| 110 out_hi = -100 | |
| 111 out_v_lo = 0 | |
| 112 out_v_hi = 0 | |
| 113 | |
| 114 def flush_range(): | |
| 115 if out_lo >= 0: | |
| 116 if out_lo == out_hi: | |
| 117 singles.append((out_lo, out_v_lo)) | |
| 118 else: | |
| 119 ranges.append((out_lo, out_hi, out_v_lo)) | |
| 120 | |
| 121 keys = list(map.keys()) | |
| 122 keys.sort() | |
| 123 for code in keys: | |
| 124 v = map[code] | |
| 125 if type(v) is not int: | |
| 126 flush_range() | |
| 127 out_lo = out_hi = -100 | |
| 128 mranges.append((code, v)) | |
| 129 else: | |
| 130 if code != out_hi + 1 or v != out_v_hi + 1: | |
| 131 flush_range() | |
| 132 out_lo = out_hi = code | |
| 133 out_v_lo = out_v_hi = v | |
| 134 else: | |
| 135 out_hi = out_hi + 1 | |
| 136 out_v_hi = out_v_hi + 1 | |
| 137 flush_range() | |
| 138 | |
| 139 # Print CMap file | |
| 140 | |
| 141 print("%!PS-Adobe-3.0 Resource-CMap") | |
| 142 print("%%DocumentNeededResources: procset (CIDInit)") | |
| 143 print("%%IncludeResource: procset (CIDInit)") | |
| 144 print("%%%%BeginResource: CMap (%s)" % cmapname) | |
| 145 print("%%%%Version: %s" % cmapversion) | |
| 146 print("%%EndComments") | |
| 147 print("/CIDInit /ProcSet findresource begin") | |
| 148 print("12 dict begin") | |
| 149 print("begincmap") | |
| 150 if usecmap: print("/%s usecmap" % usecmap) | |
| 151 print("/CIDSystemInfo 3 dict dup begin") | |
| 152 print(" /Registry %s def" % csi_registry) | |
| 153 print(" /Ordering %s def" % csi_ordering) | |
| 154 print(" /Supplement %s def" % csi_supplement) | |
| 155 print("end def") | |
| 156 print("/CMapName /%s def" % cmapname) | |
| 157 print("/CMapVersion %s def" % cmapversion) | |
| 158 print("/CMapType 1 def") | |
| 159 print("/WMode %d def" % wmode) | |
| 160 | |
| 161 if len(codespacerange): | |
| 162 print("%d begincodespacerange" % len(codespacerange)) | |
| 163 for r in codespacerange: | |
| 164 fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2) | |
| 165 print(fmt % (r[1], r[2])) | |
| 166 print("endcodespacerange") | |
| 167 | |
| 168 if len(singles) > 0: | |
| 169 if isbf: | |
| 170 print("%d beginbfchar" % len(singles)) | |
| 171 for s in singles: | |
| 172 print("<%04x> <%04x>" % s) | |
| 173 print("endbfchar") | |
| 174 else: | |
| 175 print("%d begincidchar" % len(singles)) | |
| 176 for s in singles: | |
| 177 print("<%04x> %d" % s) | |
| 178 print("endcidchar") | |
| 179 | |
| 180 if len(ranges) > 0: | |
| 181 if isbf: | |
| 182 print("%d beginbfrange" % len(ranges)) | |
| 183 for r in ranges: | |
| 184 print("<%04x> <%04x> <%04x>" % r) | |
| 185 print("endbfrange") | |
| 186 else: | |
| 187 print("%d begincidrange" % len(ranges)) | |
| 188 for r in ranges: | |
| 189 print("<%04x> <%04x> %d" % r) | |
| 190 print("endcidrange") | |
| 191 | |
| 192 if len(mranges) > 0: | |
| 193 print("%d beginbfchar" % len(mranges)) | |
| 194 for cid, v in mranges: | |
| 195 print("<%04x> <%s>" % (cid, "".join(["%04x" % ch for ch in v]))) | |
| 196 print("endbfchar") | |
| 197 | |
| 198 print("endcmap") | |
| 199 print("CMapName currentdict /CMap defineresource pop") | |
| 200 print("end") | |
| 201 print("end") | |
| 202 print("%%EndResource") | |
| 203 print("%%EOF") | |
| 204 | |
| 205 for arg in sys.argv[1:]: | |
| 206 cleancmap(arg) |
