Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/scripts/cmapshare.py @ 40:aa33339d6b8a upstream
ADD: MuPDF v1.26.10: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.5.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 11:31:38 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
#!/usr/bin/env python3 # Find and extract common CMap subsets. # Taken flattened CMaps as input, using only the 'cidchar' sections. # The outputs are truncated; so use 'cmapflatten.py' to clean them up. import sys, os def load_cmap_set(filename): cmap = set() active = False for line in open(filename).readlines(): line = line.strip() if line.endswith("endcidchar"): active = False if active: cmap.add(line) if line.endswith("begincidchar"): active = True return cmap def load_cmap_prologue(filename): prologue = [] for line in open(filename).readlines(): line = line.strip() if line.endswith("begincidchar"): break prologue.append(line) return prologue epilogue = [ 'endcidchar', ] common_name = os.path.basename(sys.argv[1]) # First find the common subset common = load_cmap_set(sys.argv[2]) for f in sys.argv[3:]: common &= load_cmap_set(f) def print_cmap(filename, prologue, cmap): out = open(filename, "w") for line in prologue: if not line.endswith("usecmap"): print(line, file=out) if line == 'begincmap': print("/"+common_name, "usecmap", file=out) print(len(cmap), "begincidchar", file=out) for line in sorted(cmap): print(line, file=out) for line in epilogue: print(line, file=out) # Print common subset print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common) # Now find unique bits for f in sys.argv[2:]: cmap = load_cmap_set(f) - common prologue = load_cmap_prologue(f) print_cmap(f+".shared", prologue, cmap)
