comparison mupdf-source/scripts/makesubset.py @ 3:2c135c81b16c

MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:44:09 +0200
parents b50eed0cc0ef
children
comparison
equal deleted inserted replaced
0:6015a75abc2d 3:2c135c81b16c
1 #!/usr/bin/env python3
2
3 # Convert MES-2 (or WGL4) character set to list of glyphs for font subsetting.
4 # Also add small-caps glyph names for small letters and ligatures.
5
6 import sys
7
8 glyphs = {}
9 for line in open("scripts/glyphlist.txt").readlines():
10 if len(line) > 0 and line[0] != '#':
11 n, u = line.rstrip().split(';')
12 if len(u) == 4:
13 u = int(u, base=16)
14 if u >= 0x0000 and u <= 0x001F: continue # control block 1
15 if u >= 0x007F and u <= 0x009F: continue # control block 2
16 if u >= 0x2500 and u <= 0x25FF: continue # Box Drawing, Block Elements, Geometric Shapes
17 if u not in glyphs:
18 glyphs[u] = [n]
19 else:
20 glyphs[u].append(n)
21
22 # Ligatures are mapped to 'fi' and 'fl'; we also want them using the 'f_i' convention.
23
24 table = {}
25 do_small = False
26
27 def load_table(fn):
28 for line in open(fn).readlines():
29 is_small = ('SMALL LETTER' in line) or ('SMALL LIGATURE' in line)
30 u = int(line.split()[0], 16)
31 if u in glyphs:
32 for n in glyphs[u]:
33 table[n] = u
34 if do_small and is_small:
35 table[n+'.sc'] = u
36 if u >= 128:
37 table['uni%04X'%u] = u
38 if do_small and is_small:
39 table['uni%04X.sc'%u] = u
40
41 def load_ligs():
42 table['ff'] = 0xFB00
43 table['fi'] = 0xFB01
44 table['fl'] = 0xFB02
45 table['ffi'] = 0xFB03
46 table['ffl'] = 0xFB04
47 if do_small:
48 table['f_f.sc'] = 0xFB00
49 table['f_i.sc'] = 0xFB01
50 table['f_l.sc'] = 0xFB02
51 table['f_f_i.sc'] = 0xFB03
52 table['f_f_l.sc'] = 0xFB04
53
54 if len(sys.argv) < 2:
55 print('usage: python3 scripts/makesubset.py scripts/MES-2.TXT', file=sys.stderr)
56 else:
57 for input in sys.argv[1:]:
58 if input == '-sc':
59 do_small = True
60 elif input == '-lig':
61 load_ligs()
62 else:
63 load_table(input)
64
65 if len(sys.argv) > 2 and sys.argv[2] == '-scdump':
66 smcp = []
67 for n in list(table.keys()):
68 u = table[n]
69 if u > 0 and n.endswith('.sc') and not n.startswith('uni'):
70 smcp.append('{0x%04X, "%s"},' % (u,n))
71 smcp.sort()
72 print('\n\t'.join(smcp))
73 else:
74 list = list(table.keys())
75 list.sort()
76 print(','.join(list))