comparison mupdf-source/scripts/genucd.py @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 # Create utfdata.h from UnicodeData.txt
2
3 tolower = []
4 toupper = []
5 isalpha = []
6
7 for line in open("UnicodeData.txt").readlines():
8 line = line.split(";")
9 code = int(line[0],16)
10 # if code > 65535: continue # skip non-BMP codepoints
11 if line[2][0] == 'L':
12 isalpha.append(code)
13 if line[12]:
14 toupper.append((code,int(line[12],16)))
15 if line[13]:
16 tolower.append((code,int(line[13],16)))
17
18 def dumpalpha():
19 table = []
20 prev = 0
21 start = 0
22 for code in isalpha:
23 if code != prev+1:
24 if start:
25 table.append((start,prev))
26 start = code
27 prev = code
28 table.append((start,prev))
29
30 print("")
31 print("static const int ucd_alpha2[] = {")
32 for a, b in table:
33 if b - a > 0:
34 print(hex(a)+","+hex(b)+",")
35 print("};");
36
37 print("")
38 print("static const int ucd_alpha1[] = {")
39 for a, b in table:
40 if b - a == 0:
41 print(hex(a)+",")
42 print("};");
43
44 def dumpmap(name, input):
45 table = []
46 prev_a = 0
47 prev_b = 0
48 start_a = 0
49 start_b = 0
50 for a, b in input:
51 if a != prev_a+1 or b != prev_b+1:
52 if start_a:
53 table.append((start_a,prev_a,start_b))
54 start_a = a
55 start_b = b
56 prev_a = a
57 prev_b = b
58 table.append((start_a,prev_a,start_b))
59
60 print("")
61 print("static const int " + name + "2[] = {")
62 for a, b, n in table:
63 if b - a > 0:
64 print(hex(a)+","+hex(b)+","+str(n-a)+",")
65 print("};");
66
67 print("")
68 print("static const int " + name + "1[] = {")
69 for a, b, n in table:
70 if b - a == 0:
71 print(hex(a)+","+str(n-a)+",")
72 print("};");
73
74 print("/* This file was automatically created from UnicodeData.txt */")
75 dumpalpha()
76 dumpmap("ucd_tolower", tolower)
77 dumpmap("ucd_toupper", toupper)