Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-cmap-load.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2024 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include "cmaps/TrueType-UCS2.h" | |
| 27 | |
| 28 #include <string.h> | |
| 29 | |
| 30 static pdf_cmap * | |
| 31 pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up) | |
| 32 { | |
| 33 pdf_cycle_list cycle; | |
| 34 fz_stream *file = NULL; | |
| 35 pdf_cmap *cmap = NULL; | |
| 36 pdf_cmap *usecmap = NULL; | |
| 37 pdf_obj *obj; | |
| 38 | |
| 39 fz_var(file); | |
| 40 fz_var(cmap); | |
| 41 fz_var(usecmap); | |
| 42 | |
| 43 if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) | |
| 44 return cmap; | |
| 45 | |
| 46 fz_try(ctx) | |
| 47 { | |
| 48 file = pdf_open_stream(ctx, stmobj); | |
| 49 cmap = pdf_load_cmap(ctx, file); | |
| 50 | |
| 51 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode)); | |
| 52 if (pdf_is_int(ctx, obj)) | |
| 53 pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); | |
| 54 | |
| 55 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap)); | |
| 56 if (pdf_is_name(ctx, obj)) | |
| 57 { | |
| 58 fz_try(ctx) | |
| 59 { | |
| 60 usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); | |
| 61 pdf_set_usecmap(ctx, cmap, usecmap); | |
| 62 } | |
| 63 fz_catch(ctx) | |
| 64 { | |
| 65 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 66 fz_report_error(ctx); | |
| 67 fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj)); | |
| 68 } | |
| 69 } | |
| 70 else if (pdf_is_indirect(ctx, obj)) | |
| 71 { | |
| 72 if (pdf_cycle(ctx, &cycle, cycle_up, obj)) | |
| 73 fz_throw(ctx, FZ_ERROR_FORMAT, "recursive CMap"); | |
| 74 usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle); | |
| 75 pdf_set_usecmap(ctx, cmap, usecmap); | |
| 76 } | |
| 77 else if (strlen(cmap->usecmap_name) > 0) | |
| 78 { | |
| 79 fz_try(ctx) | |
| 80 { | |
| 81 usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); | |
| 82 pdf_set_usecmap(ctx, cmap, usecmap); | |
| 83 } | |
| 84 fz_catch(ctx) | |
| 85 { | |
| 86 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 87 fz_report_error(ctx); | |
| 88 fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj)); | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); | |
| 93 } | |
| 94 fz_always(ctx) | |
| 95 { | |
| 96 fz_drop_stream(ctx, file); | |
| 97 pdf_drop_cmap(ctx, usecmap); | |
| 98 } | |
| 99 fz_catch(ctx) | |
| 100 { | |
| 101 pdf_drop_cmap(ctx, cmap); | |
| 102 fz_rethrow(ctx); | |
| 103 } | |
| 104 | |
| 105 return cmap; | |
| 106 } | |
| 107 | |
| 108 pdf_cmap * | |
| 109 pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) | |
| 110 { | |
| 111 return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL); | |
| 112 } | |
| 113 | |
| 114 pdf_cmap * | |
| 115 pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) | |
| 116 { | |
| 117 pdf_cmap *cmap = pdf_new_cmap(ctx); | |
| 118 fz_try(ctx) | |
| 119 { | |
| 120 unsigned int high = (1 << (bytes * 8)) - 1; | |
| 121 if (wmode) | |
| 122 fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name); | |
| 123 else | |
| 124 fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name); | |
| 125 pdf_add_codespace(ctx, cmap, 0, high, bytes); | |
| 126 pdf_map_range_to_range(ctx, cmap, 0, high, 0); | |
| 127 pdf_sort_cmap(ctx, cmap); | |
| 128 pdf_set_cmap_wmode(ctx, cmap, wmode); | |
| 129 } | |
| 130 fz_catch(ctx) | |
| 131 { | |
| 132 pdf_drop_cmap(ctx, cmap); | |
| 133 fz_rethrow(ctx); | |
| 134 } | |
| 135 return cmap; | |
| 136 } | |
| 137 | |
| 138 #ifdef NO_CJK | |
| 139 | |
| 140 pdf_cmap * | |
| 141 pdf_load_builtin_cmap(fz_context *ctx, const char *name) | |
| 142 { | |
| 143 if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2); | |
| 144 if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2); | |
| 145 if (!strcmp(name, "TrueType-UCS2")) return &cmap_TrueType_UCS2; | |
| 146 return NULL; | |
| 147 } | |
| 148 | |
| 149 #else | |
| 150 | |
| 151 /* To regenerate this list: :r !bash scripts/runcmapdump.sh */ | |
| 152 | |
| 153 #include "cmaps/83pv-RKSJ-H.h" | |
| 154 #include "cmaps/90ms-RKSJ-H.h" | |
| 155 #include "cmaps/90ms-RKSJ-V.h" | |
| 156 #include "cmaps/90msp-RKSJ-H.h" | |
| 157 #include "cmaps/90msp-RKSJ-V.h" | |
| 158 #include "cmaps/90pv-RKSJ-H.h" | |
| 159 #include "cmaps/Add-RKSJ-H.h" | |
| 160 #include "cmaps/Add-RKSJ-V.h" | |
| 161 #include "cmaps/Adobe-CNS1-UCS2.h" | |
| 162 #include "cmaps/Adobe-GB1-UCS2.h" | |
| 163 #include "cmaps/Adobe-Japan1-UCS2.h" | |
| 164 #include "cmaps/Adobe-Korea1-UCS2.h" | |
| 165 #include "cmaps/B5pc-H.h" | |
| 166 #include "cmaps/B5pc-V.h" | |
| 167 #include "cmaps/CNS-EUC-H.h" | |
| 168 #include "cmaps/CNS-EUC-V.h" | |
| 169 #include "cmaps/ETen-B5-H.h" | |
| 170 #include "cmaps/ETen-B5-V.h" | |
| 171 #include "cmaps/ETenms-B5-H.h" | |
| 172 #include "cmaps/ETenms-B5-V.h" | |
| 173 #include "cmaps/EUC-H.h" | |
| 174 #include "cmaps/EUC-V.h" | |
| 175 #include "cmaps/Ext-RKSJ-H.h" | |
| 176 #include "cmaps/Ext-RKSJ-V.h" | |
| 177 #include "cmaps/GB-EUC-H.h" | |
| 178 #include "cmaps/GB-EUC-V.h" | |
| 179 #include "cmaps/GBK-EUC-H.h" | |
| 180 #include "cmaps/GBK-EUC-V.h" | |
| 181 #include "cmaps/GBK-X.h" | |
| 182 #include "cmaps/GBK2K-H.h" | |
| 183 #include "cmaps/GBK2K-V.h" | |
| 184 #include "cmaps/GBKp-EUC-H.h" | |
| 185 #include "cmaps/GBKp-EUC-V.h" | |
| 186 #include "cmaps/GBpc-EUC-H.h" | |
| 187 #include "cmaps/GBpc-EUC-V.h" | |
| 188 #include "cmaps/H.h" | |
| 189 #include "cmaps/HKscs-B5-H.h" | |
| 190 #include "cmaps/HKscs-B5-V.h" | |
| 191 #include "cmaps/Identity-H.h" | |
| 192 #include "cmaps/Identity-V.h" | |
| 193 #include "cmaps/KSC-EUC-H.h" | |
| 194 #include "cmaps/KSC-EUC-V.h" | |
| 195 #include "cmaps/KSCms-UHC-H.h" | |
| 196 #include "cmaps/KSCms-UHC-HW-H.h" | |
| 197 #include "cmaps/KSCms-UHC-HW-V.h" | |
| 198 #include "cmaps/KSCms-UHC-V.h" | |
| 199 #include "cmaps/KSCpc-EUC-H.h" | |
| 200 #include "cmaps/UniCNS-UCS2-H.h" | |
| 201 #include "cmaps/UniCNS-UCS2-V.h" | |
| 202 #include "cmaps/UniCNS-UTF16-H.h" | |
| 203 #include "cmaps/UniCNS-UTF16-V.h" | |
| 204 #include "cmaps/UniCNS-X.h" | |
| 205 #include "cmaps/UniGB-UCS2-H.h" | |
| 206 #include "cmaps/UniGB-UCS2-V.h" | |
| 207 #include "cmaps/UniGB-UTF16-H.h" | |
| 208 #include "cmaps/UniGB-UTF16-V.h" | |
| 209 #include "cmaps/UniGB-X.h" | |
| 210 #include "cmaps/UniJIS-UCS2-H.h" | |
| 211 #include "cmaps/UniJIS-UCS2-HW-H.h" | |
| 212 #include "cmaps/UniJIS-UCS2-HW-V.h" | |
| 213 #include "cmaps/UniJIS-UCS2-V.h" | |
| 214 #include "cmaps/UniJIS-UTF16-H.h" | |
| 215 #include "cmaps/UniJIS-UTF16-V.h" | |
| 216 #include "cmaps/UniJIS-X.h" | |
| 217 #include "cmaps/UniKS-UCS2-H.h" | |
| 218 #include "cmaps/UniKS-UCS2-V.h" | |
| 219 #include "cmaps/UniKS-UTF16-H.h" | |
| 220 #include "cmaps/UniKS-UTF16-V.h" | |
| 221 #include "cmaps/UniKS-X.h" | |
| 222 #include "cmaps/V.h" | |
| 223 | |
| 224 static pdf_cmap *table[] = { | |
| 225 &cmap_83pv_RKSJ_H, | |
| 226 &cmap_90ms_RKSJ_H, | |
| 227 &cmap_90ms_RKSJ_V, | |
| 228 &cmap_90msp_RKSJ_H, | |
| 229 &cmap_90msp_RKSJ_V, | |
| 230 &cmap_90pv_RKSJ_H, | |
| 231 &cmap_Add_RKSJ_H, | |
| 232 &cmap_Add_RKSJ_V, | |
| 233 &cmap_Adobe_CNS1_UCS2, | |
| 234 &cmap_Adobe_GB1_UCS2, | |
| 235 &cmap_Adobe_Japan1_UCS2, | |
| 236 &cmap_Adobe_Korea1_UCS2, | |
| 237 &cmap_B5pc_H, | |
| 238 &cmap_B5pc_V, | |
| 239 &cmap_CNS_EUC_H, | |
| 240 &cmap_CNS_EUC_V, | |
| 241 &cmap_ETen_B5_H, | |
| 242 &cmap_ETen_B5_V, | |
| 243 &cmap_ETenms_B5_H, | |
| 244 &cmap_ETenms_B5_V, | |
| 245 &cmap_EUC_H, | |
| 246 &cmap_EUC_V, | |
| 247 &cmap_Ext_RKSJ_H, | |
| 248 &cmap_Ext_RKSJ_V, | |
| 249 &cmap_GB_EUC_H, | |
| 250 &cmap_GB_EUC_V, | |
| 251 &cmap_GBK_EUC_H, | |
| 252 &cmap_GBK_EUC_V, | |
| 253 &cmap_GBK_X, | |
| 254 &cmap_GBK2K_H, | |
| 255 &cmap_GBK2K_V, | |
| 256 &cmap_GBKp_EUC_H, | |
| 257 &cmap_GBKp_EUC_V, | |
| 258 &cmap_GBpc_EUC_H, | |
| 259 &cmap_GBpc_EUC_V, | |
| 260 &cmap_H, | |
| 261 &cmap_HKscs_B5_H, | |
| 262 &cmap_HKscs_B5_V, | |
| 263 &cmap_Identity_H, | |
| 264 &cmap_Identity_V, | |
| 265 &cmap_KSC_EUC_H, | |
| 266 &cmap_KSC_EUC_V, | |
| 267 &cmap_KSCms_UHC_H, | |
| 268 &cmap_KSCms_UHC_HW_H, | |
| 269 &cmap_KSCms_UHC_HW_V, | |
| 270 &cmap_KSCms_UHC_V, | |
| 271 &cmap_KSCpc_EUC_H, | |
| 272 &cmap_TrueType_UCS2, | |
| 273 &cmap_UniCNS_UCS2_H, | |
| 274 &cmap_UniCNS_UCS2_V, | |
| 275 &cmap_UniCNS_UTF16_H, | |
| 276 &cmap_UniCNS_UTF16_V, | |
| 277 &cmap_UniCNS_X, | |
| 278 &cmap_UniGB_UCS2_H, | |
| 279 &cmap_UniGB_UCS2_V, | |
| 280 &cmap_UniGB_UTF16_H, | |
| 281 &cmap_UniGB_UTF16_V, | |
| 282 &cmap_UniGB_X, | |
| 283 &cmap_UniJIS_UCS2_H, | |
| 284 &cmap_UniJIS_UCS2_HW_H, | |
| 285 &cmap_UniJIS_UCS2_HW_V, | |
| 286 &cmap_UniJIS_UCS2_V, | |
| 287 &cmap_UniJIS_UTF16_H, | |
| 288 &cmap_UniJIS_UTF16_V, | |
| 289 &cmap_UniJIS_X, | |
| 290 &cmap_UniKS_UCS2_H, | |
| 291 &cmap_UniKS_UCS2_V, | |
| 292 &cmap_UniKS_UTF16_H, | |
| 293 &cmap_UniKS_UTF16_V, | |
| 294 &cmap_UniKS_X, | |
| 295 &cmap_V, | |
| 296 }; | |
| 297 | |
| 298 pdf_cmap * | |
| 299 pdf_load_builtin_cmap(fz_context *ctx, const char *name) | |
| 300 { | |
| 301 int r = nelem(table)-1; | |
| 302 int l = 0; | |
| 303 while (l <= r) | |
| 304 { | |
| 305 int m = (l + r) >> 1; | |
| 306 int c = strcmp(name, table[m]->cmap_name); | |
| 307 if (c < 0) | |
| 308 r = m - 1; | |
| 309 else if (c > 0) | |
| 310 l = m + 1; | |
| 311 else | |
| 312 return table[m]; | |
| 313 } | |
| 314 return NULL; | |
| 315 } | |
| 316 | |
| 317 #endif | |
| 318 | |
| 319 pdf_cmap * | |
| 320 pdf_load_system_cmap(fz_context *ctx, const char *cmap_name) | |
| 321 { | |
| 322 pdf_cmap *usecmap; | |
| 323 pdf_cmap *cmap; | |
| 324 | |
| 325 cmap = pdf_load_builtin_cmap(ctx, cmap_name); | |
| 326 if (!cmap) | |
| 327 fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap_name); | |
| 328 | |
| 329 if (cmap->usecmap_name[0] && !cmap->usecmap) | |
| 330 { | |
| 331 usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name); | |
| 332 if (!usecmap) | |
| 333 fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap->usecmap_name); | |
| 334 pdf_set_usecmap(ctx, cmap, usecmap); | |
| 335 } | |
| 336 | |
| 337 return cmap; | |
| 338 } |
