Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/scripts/makesubset.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/scripts/makesubset.py Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +# Convert MES-2 (or WGL4) character set to list of glyphs for font subsetting. +# Also add small-caps glyph names for small letters and ligatures. + +import sys + +glyphs = {} +for line in open("scripts/glyphlist.txt").readlines(): + if len(line) > 0 and line[0] != '#': + n, u = line.rstrip().split(';') + if len(u) == 4: + u = int(u, base=16) + if u >= 0x0000 and u <= 0x001F: continue # control block 1 + if u >= 0x007F and u <= 0x009F: continue # control block 2 + if u >= 0x2500 and u <= 0x25FF: continue # Box Drawing, Block Elements, Geometric Shapes + if u not in glyphs: + glyphs[u] = [n] + else: + glyphs[u].append(n) + +# Ligatures are mapped to 'fi' and 'fl'; we also want them using the 'f_i' convention. + +table = {} +do_small = False + +def load_table(fn): + for line in open(fn).readlines(): + is_small = ('SMALL LETTER' in line) or ('SMALL LIGATURE' in line) + u = int(line.split()[0], 16) + if u in glyphs: + for n in glyphs[u]: + table[n] = u + if do_small and is_small: + table[n+'.sc'] = u + if u >= 128: + table['uni%04X'%u] = u + if do_small and is_small: + table['uni%04X.sc'%u] = u + +def load_ligs(): + table['ff'] = 0xFB00 + table['fi'] = 0xFB01 + table['fl'] = 0xFB02 + table['ffi'] = 0xFB03 + table['ffl'] = 0xFB04 + if do_small: + table['f_f.sc'] = 0xFB00 + table['f_i.sc'] = 0xFB01 + table['f_l.sc'] = 0xFB02 + table['f_f_i.sc'] = 0xFB03 + table['f_f_l.sc'] = 0xFB04 + +if len(sys.argv) < 2: + print('usage: python3 scripts/makesubset.py scripts/MES-2.TXT', file=sys.stderr) +else: + for input in sys.argv[1:]: + if input == '-sc': + do_small = True + elif input == '-lig': + load_ligs() + else: + load_table(input) + + if len(sys.argv) > 2 and sys.argv[2] == '-scdump': + smcp = [] + for n in list(table.keys()): + u = table[n] + if u > 0 and n.endswith('.sc') and not n.startswith('uni'): + smcp.append('{0x%04X, "%s"},' % (u,n)) + smcp.sort() + print('\n\t'.join(smcp)) + else: + list = list(table.keys()) + list.sort() + print(','.join(list))
