Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/scripts/cmapflatten.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 # Parse a Uni* CMap file and flatten it. | |
| 4 # | |
| 5 # The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never | |
| 6 # 'bfchar' or 'bfrange'. | |
| 7 | |
| 8 import sys | |
| 9 | |
| 10 def flattencmap(filename): | |
| 11 codespacerange = [] | |
| 12 usecmap = "" | |
| 13 cmapname = "" | |
| 14 cmapversion = "1.0" | |
| 15 csi_registry = "(Adobe)" | |
| 16 csi_ordering = "(Unknown)" | |
| 17 csi_supplement = 1 | |
| 18 wmode = 0 | |
| 19 | |
| 20 map = {} | |
| 21 | |
| 22 def tocode(s): | |
| 23 if s[0] == '<' and s[-1] == '>': | |
| 24 return int(s[1:-1], 16) | |
| 25 return int(s, 10) | |
| 26 | |
| 27 def map_cidchar(lo, v): | |
| 28 map[lo] = v | |
| 29 | |
| 30 def map_cidrange(lo, hi, v): | |
| 31 while lo <= hi: | |
| 32 map[lo] = v | |
| 33 lo = lo + 1 | |
| 34 v = v + 1 | |
| 35 | |
| 36 current = None | |
| 37 for line in open(filename, "r").readlines(): | |
| 38 if line[0] == '%': | |
| 39 continue | |
| 40 line = line.strip().split() | |
| 41 if len(line) == 0: | |
| 42 continue | |
| 43 if line[0] == '/CMapVersion': cmapversion = line[1] | |
| 44 elif line[0] == '/CMapName': cmapname = line[1][1:] | |
| 45 elif line[0] == '/WMode': wmode = int(line[1]) | |
| 46 elif line[0] == '/Registry': csi_registry = line[1] | |
| 47 elif line[0] == '/Ordering': csi_ordering = line[1] | |
| 48 elif line[0] == '/Supplement': csi_supplement = line[1] | |
| 49 elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:] | |
| 50 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange' | |
| 51 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange' | |
| 52 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar' | |
| 53 elif line[0].startswith("end"): | |
| 54 current = None | |
| 55 elif current == 'codespacerange' and len(line) == 2: | |
| 56 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1]) | |
| 57 codespacerange.append((n, a, b)) | |
| 58 elif current == 'cidrange' and len(line) == 3: | |
| 59 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2]) | |
| 60 map_cidrange(a, b, c) | |
| 61 elif current == 'cidchar' and len(line) == 2: | |
| 62 a, b = tocode(line[0]), tocode(line[1]) | |
| 63 map_cidchar(a, b) | |
| 64 | |
| 65 # Print flattened CMap file | |
| 66 | |
| 67 print("%!PS-Adobe-3.0 Resource-CMap") | |
| 68 print("%%DocumentNeededResources: procset (CIDInit)") | |
| 69 print("%%IncludeResource: procset (CIDInit)") | |
| 70 print("%%%%BeginResource: CMap (%s)" % cmapname) | |
| 71 print("%%%%Version: %s" % cmapversion) | |
| 72 print("%%EndComments") | |
| 73 print("/CIDInit /ProcSet findresource begin") | |
| 74 print("12 dict begin") | |
| 75 print("begincmap") | |
| 76 if usecmap: print("/%s usecmap" % usecmap) | |
| 77 print("/CIDSystemInfo 3 dict dup begin") | |
| 78 print(" /Registry %s def" % csi_registry) | |
| 79 print(" /Ordering %s def" % csi_ordering) | |
| 80 print(" /Supplement %s def" % csi_supplement) | |
| 81 print("end def") | |
| 82 print("/CMapName /%s def" % cmapname) | |
| 83 print("/CMapVersion %s def" % cmapversion) | |
| 84 print("/CMapType 1 def") | |
| 85 print("/WMode %d def" % wmode) | |
| 86 | |
| 87 if len(codespacerange): | |
| 88 print("%d begincodespacerange" % len(codespacerange)) | |
| 89 for r in codespacerange: | |
| 90 fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2) | |
| 91 print(fmt % (r[1], r[2])) | |
| 92 print("endcodespacerange") | |
| 93 | |
| 94 keys = list(map.keys()) | |
| 95 keys.sort() | |
| 96 print("%d begincidchar" % len(keys)) | |
| 97 for code in keys: | |
| 98 v = map[code] | |
| 99 print("<%04x> %d" % (code, v)) | |
| 100 print("endcidchar") | |
| 101 | |
| 102 print("endcmap") | |
| 103 print("CMapName currentdict /CMap defineresource pop") | |
| 104 print("end") | |
| 105 print("end") | |
| 106 print("%%EndResource") | |
| 107 print("%%EOF") | |
| 108 | |
| 109 for arg in sys.argv[1:]: | |
| 110 flattencmap(arg) |
