Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/scripts/cmapshare.py @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line source
#!/usr/bin/env python3 # Find and extract common CMap subsets. # Taken flattened CMaps as input, using only the 'cidchar' sections. # The outputs are truncated; so use 'cmapflatten.py' to clean them up. import sys, os def load_cmap_set(filename): cmap = set() active = False for line in open(filename).readlines(): line = line.strip() if line.endswith("endcidchar"): active = False if active: cmap.add(line) if line.endswith("begincidchar"): active = True return cmap def load_cmap_prologue(filename): prologue = [] for line in open(filename).readlines(): line = line.strip() if line.endswith("begincidchar"): break prologue.append(line) return prologue epilogue = [ 'endcidchar', ] common_name = os.path.basename(sys.argv[1]) # First find the common subset common = load_cmap_set(sys.argv[2]) for f in sys.argv[3:]: common &= load_cmap_set(f) def print_cmap(filename, prologue, cmap): out = open(filename, "w") for line in prologue: if not line.endswith("usecmap"): print(line, file=out) if line == 'begincmap': print("/"+common_name, "usecmap", file=out) print(len(cmap), "begincidchar", file=out) for line in sorted(cmap): print(line, file=out) for line in epilogue: print(line, file=out) # Print common subset print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common) # Now find unique bits for f in sys.argv[2:]: cmap = load_cmap_set(f) - common prologue = load_cmap_prologue(f) print_cmap(f+".shared", prologue, cmap)
