diff mupdf-source/scripts/cmapflatten.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/scripts/cmapflatten.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+# Parse a Uni* CMap file and flatten it.
+#
+# The Uni* CMap files only have 'cidchar' and 'cidrange' sections, never
+# 'bfchar' or 'bfrange'.
+
+import sys
+
+def flattencmap(filename):
+	codespacerange = []
+	usecmap = ""
+	cmapname = ""
+	cmapversion = "1.0"
+	csi_registry = "(Adobe)"
+	csi_ordering = "(Unknown)"
+	csi_supplement = 1
+	wmode = 0
+
+	map = {}
+
+	def tocode(s):
+		if s[0] == '<' and s[-1] == '>':
+			return int(s[1:-1], 16)
+		return int(s, 10)
+
+	def map_cidchar(lo, v):
+		map[lo] = v
+
+	def map_cidrange(lo, hi, v):
+		while lo <= hi:
+			map[lo] = v
+			lo = lo + 1
+			v = v + 1
+
+	current = None
+	for line in open(filename, "r").readlines():
+		if line[0] == '%':
+			continue
+		line = line.strip().split()
+		if len(line) == 0:
+			continue
+		if line[0] == '/CMapVersion': cmapversion = line[1]
+		elif line[0] == '/CMapName': cmapname = line[1][1:]
+		elif line[0] == '/WMode': wmode = int(line[1])
+		elif line[0] == '/Registry': csi_registry = line[1]
+		elif line[0] == '/Ordering': csi_ordering = line[1]
+		elif line[0] == '/Supplement': csi_supplement = line[1]
+		elif len(line) > 1 and line[1] == 'usecmap': usecmap = line[0][1:]
+		elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
+		elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
+		elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
+		elif line[0].startswith("end"):
+			current = None
+		elif current == 'codespacerange' and len(line) == 2:
+			n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
+			codespacerange.append((n, a, b))
+		elif current == 'cidrange' and len(line) == 3:
+			a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
+			map_cidrange(a, b, c)
+		elif current == 'cidchar' and len(line) == 2:
+			a, b = tocode(line[0]), tocode(line[1])
+			map_cidchar(a, b)
+
+	# Print flattened CMap file
+
+	print("%!PS-Adobe-3.0 Resource-CMap")
+	print("%%DocumentNeededResources: procset (CIDInit)")
+	print("%%IncludeResource: procset (CIDInit)")
+	print("%%%%BeginResource: CMap (%s)" % cmapname)
+	print("%%%%Version: %s" % cmapversion)
+	print("%%EndComments")
+	print("/CIDInit /ProcSet findresource begin")
+	print("12 dict begin")
+	print("begincmap")
+	if usecmap: print("/%s usecmap" % usecmap)
+	print("/CIDSystemInfo 3 dict dup begin")
+	print("  /Registry %s def" % csi_registry)
+	print("  /Ordering %s def" % csi_ordering)
+	print("  /Supplement %s def" % csi_supplement)
+	print("end def")
+	print("/CMapName /%s def" % cmapname)
+	print("/CMapVersion %s def" % cmapversion)
+	print("/CMapType 1 def")
+	print("/WMode %d def" % wmode)
+
+	if len(codespacerange):
+		print("%d begincodespacerange" % len(codespacerange))
+		for r in codespacerange:
+			fmt = "<%%0%dx> <%%0%dx>" % (r[0]*2, r[0]*2)
+			print(fmt % (r[1], r[2]))
+		print("endcodespacerange")
+
+	keys = list(map.keys())
+	keys.sort()
+	print("%d begincidchar" % len(keys))
+	for code in keys:
+		v = map[code]
+		print("<%04x> %d" % (code, v))
+	print("endcidchar")
+
+	print("endcmap")
+	print("CMapName currentdict /CMap defineresource pop")
+	print("end")
+	print("end")
+	print("%%EndResource")
+	print("%%EOF")
+
+for arg in sys.argv[1:]:
+	flattencmap(arg)