diff mupdf-source/thirdparty/mujs/genucd.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/mujs/genucd.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,79 @@
+# Create utfdata.h from UnicodeData.txt
+
+import sys
+
+tolower = []
+toupper = []
+isalpha = []
+
+for line in open(sys.argv[1]).readlines():
+	line = line.split(";")
+	code = int(line[0],16)
+	# if code > 65535: continue # skip non-BMP codepoints
+	if line[2][0] == 'L':
+		isalpha.append(code)
+	if line[12]:
+		toupper.append((code,int(line[12],16)))
+	if line[13]:
+		tolower.append((code,int(line[13],16)))
+
+def dumpalpha():
+	table = []
+	prev = 0
+	start = 0
+	for code in isalpha:
+		if code != prev+1:
+			if start:
+				table.append((start,prev))
+			start = code
+		prev = code
+	table.append((start,prev))
+
+	print("")
+	print("static const Rune ucd_alpha2[] = {")
+	for a, b in table:
+		if b - a > 0:
+			print(hex(a)+","+hex(b)+",")
+	print("};");
+
+	print("")
+	print("static const Rune ucd_alpha1[] = {")
+	for a, b in table:
+		if b - a == 0:
+			print(hex(a)+",")
+	print("};");
+
+def dumpmap(name, input):
+	table = []
+	prev_a = 0
+	prev_b = 0
+	start_a = 0
+	start_b = 0
+	for a, b in input:
+		if a != prev_a+1 or b != prev_b+1:
+			if start_a:
+				table.append((start_a,prev_a,start_b))
+			start_a = a
+			start_b = b
+		prev_a = a
+		prev_b = b
+	table.append((start_a,prev_a,start_b))
+
+	print("")
+	print("static const Rune " + name + "2[] = {")
+	for a, b, n in table:
+		if b - a > 0:
+			print(hex(a)+","+hex(b)+","+str(n-a)+",")
+	print("};");
+
+	print("")
+	print("static const Rune " + name + "1[] = {")
+	for a, b, n in table:
+		if b - a == 0:
+			print(hex(a)+","+str(n-a)+",")
+	print("};");
+
+print("/* This file was automatically created from " + sys.argv[1] + " */")
+dumpalpha()
+dumpmap("ucd_tolower", tolower)
+dumpmap("ucd_toupper", toupper)