diff mupdf-source/thirdparty/harfbuzz/src/gen-emoji-table.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/harfbuzz/src/gen-emoji-table.py	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
+
+Input file:
+* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
+* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
+"""
+
+import sys
+from collections import OrderedDict
+import packTab
+
+if len (sys.argv) != 3:
+	sys.exit (__doc__)
+
+f = open(sys.argv[1])
+header = [f.readline () for _ in range(10)]
+
+ranges = OrderedDict()
+for line in f.readlines():
+	line = line.strip()
+	if not line or line[0] == '#':
+		continue
+	rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
+
+	rang = [int(s, 16) for s in rang.split('..')]
+	if len(rang) > 1:
+		start, end = rang
+	else:
+		start = end = rang[0]
+
+	if typ not in ranges:
+		ranges[typ] = []
+	if ranges[typ] and ranges[typ][-1][1] == start - 1:
+		ranges[typ][-1] = (ranges[typ][-1][0], end)
+	else:
+		ranges[typ].append((start, end))
+
+
+
+print ("/* == Start of generated table == */")
+print ("/*")
+print (" * The following tables are generated by running:")
+print (" *")
+print (" *   ./gen-emoji-table.py emoji-data.txt")
+print (" *")
+print (" * on file with this header:")
+print (" *")
+for l in header:
+	print (" * %s" % (l.strip()))
+print (" */")
+print ()
+print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
+print ("#define HB_UNICODE_EMOJI_TABLE_HH")
+print ()
+print ('#include "hb-unicode.hh"')
+print ()
+
+for typ, s in ranges.items():
+	if typ != "Extended_Pictographic": continue
+
+	arr = dict()
+	for start,end in s:
+		for i in range(start, end + 1):
+			arr[i] = 1
+
+	sol = packTab.pack_table(arr, 0, compression=9)
+	code = packTab.Code('_hb_emoji')
+	sol.genCode(code, 'is_'+typ)
+	code.print_c(linkage='static inline')
+	print()
+
+print ()
+print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
+print ()
+print ("/* == End of generated table == */")
+
+
+# Generate test file.
+sequences = []
+with open(sys.argv[2]) as f:
+    for line in f.readlines():
+        if "#" in line:
+            line = line[:line.index("#")]
+        if ";" in line:
+            line = line[:line.index(";")]
+        line = line.strip()
+        line = line.split(" ")
+        if len(line) < 2:
+            continue
+        sequences.append(line)
+
+with open("../test/shape/data/in-house/tests/emoji-clusters.tests", "w") as f:
+    for sequence in sequences:
+        f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot")
+        f.write(";" + ",".join(sequence))
+        f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")