Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-unicode.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2021 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 | |
| 28 /* Load or synthesize ToUnicode map for fonts */ | |
| 29 | |
| 30 static void | |
| 31 pdf_remap_cmap_range(fz_context *ctx, pdf_cmap *ucs_from_gid, | |
| 32 unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt) | |
| 33 { | |
| 34 unsigned int k; | |
| 35 int ucsbuf[PDF_MRANGE_CAP]; | |
| 36 int ucslen; | |
| 37 | |
| 38 for (k = 0; k <= n; ++k) | |
| 39 { | |
| 40 ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf); | |
| 41 if (ucslen == 1) | |
| 42 pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[0]); | |
| 43 else if (ucslen > 1) | |
| 44 pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen); | |
| 45 } | |
| 46 } | |
| 47 | |
| 48 static pdf_cmap * | |
| 49 pdf_remap_cmap(fz_context *ctx, pdf_cmap *gid_from_cpt, pdf_cmap *ucs_from_cpt) | |
| 50 { | |
| 51 pdf_cmap *ucs_from_gid; | |
| 52 unsigned int a, b, x; | |
| 53 int i; | |
| 54 | |
| 55 ucs_from_gid = pdf_new_cmap(ctx); | |
| 56 | |
| 57 fz_try(ctx) | |
| 58 { | |
| 59 if (gid_from_cpt->usecmap) | |
| 60 ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt); | |
| 61 | |
| 62 pdf_add_codespace(ctx, ucs_from_gid, 0, 0x7fffffff, 4); | |
| 63 | |
| 64 for (i = 0; i < gid_from_cpt->rlen; ++i) | |
| 65 { | |
| 66 a = gid_from_cpt->ranges[i].low; | |
| 67 b = gid_from_cpt->ranges[i].high; | |
| 68 x = gid_from_cpt->ranges[i].out; | |
| 69 pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt); | |
| 70 } | |
| 71 | |
| 72 for (i = 0; i < gid_from_cpt->xlen; ++i) | |
| 73 { | |
| 74 a = gid_from_cpt->xranges[i].low; | |
| 75 b = gid_from_cpt->xranges[i].high; | |
| 76 x = gid_from_cpt->xranges[i].out; | |
| 77 pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt); | |
| 78 } | |
| 79 | |
| 80 /* Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. */ | |
| 81 | |
| 82 pdf_sort_cmap(ctx, ucs_from_gid); | |
| 83 } | |
| 84 fz_catch(ctx) | |
| 85 { | |
| 86 pdf_drop_cmap(ctx, ucs_from_gid); | |
| 87 fz_rethrow(ctx); | |
| 88 } | |
| 89 | |
| 90 return ucs_from_gid; | |
| 91 } | |
| 92 | |
| 93 void | |
| 94 pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font, | |
| 95 const char **strings, char *collection, pdf_obj *cmapstm) | |
| 96 { | |
| 97 unsigned int cpt; | |
| 98 | |
| 99 if (pdf_is_stream(ctx, cmapstm)) | |
| 100 { | |
| 101 pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(ctx, doc, cmapstm); | |
| 102 fz_try(ctx) | |
| 103 font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt); | |
| 104 fz_always(ctx) | |
| 105 pdf_drop_cmap(ctx, ucs_from_cpt); | |
| 106 fz_catch(ctx) | |
| 107 fz_rethrow(ctx); | |
| 108 font->size += pdf_cmap_size(ctx, font->to_unicode); | |
| 109 } | |
| 110 | |
| 111 else if (pdf_is_name(ctx, cmapstm)) | |
| 112 { | |
| 113 pdf_cmap *ucs_from_cpt = pdf_load_system_cmap(ctx, pdf_to_name(ctx, cmapstm)); | |
| 114 fz_try(ctx) | |
| 115 font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt); | |
| 116 fz_always(ctx) | |
| 117 pdf_drop_cmap(ctx, ucs_from_cpt); | |
| 118 fz_catch(ctx) | |
| 119 fz_rethrow(ctx); | |
| 120 font->size += pdf_cmap_size(ctx, font->to_unicode); | |
| 121 } | |
| 122 | |
| 123 else if (collection) | |
| 124 { | |
| 125 if (!strcmp(collection, "Adobe-CNS1")) | |
| 126 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); | |
| 127 else if (!strcmp(collection, "Adobe-GB1")) | |
| 128 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); | |
| 129 else if (!strcmp(collection, "Adobe-Japan1")) | |
| 130 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); | |
| 131 else if (!strcmp(collection, "Adobe-Korea1")) | |
| 132 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); | |
| 133 } | |
| 134 | |
| 135 if (strings) | |
| 136 { | |
| 137 /* TODO one-to-many mappings */ | |
| 138 | |
| 139 font->cid_to_ucs = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_ucs"); | |
| 140 font->cid_to_ucs_len = 256; | |
| 141 font->size += 256 * sizeof *font->cid_to_ucs; | |
| 142 | |
| 143 for (cpt = 0; cpt < 256; cpt++) | |
| 144 { | |
| 145 if (strings[cpt]) | |
| 146 font->cid_to_ucs[cpt] = fz_unicode_from_glyph_name(strings[cpt]); | |
| 147 else | |
| 148 font->cid_to_ucs[cpt] = FZ_REPLACEMENT_CHARACTER; | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 if (!font->to_unicode && !font->cid_to_ucs) | |
| 153 { | |
| 154 /* TODO: synthesize a ToUnicode if it's a freetype font with | |
| 155 * cmap and/or post tables or if it has glyph names. */ | |
| 156 } | |
| 157 } |
