comparison mupdf-source/source/pdf/pdf-cmap-load.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include "cmaps/TrueType-UCS2.h"
27
28 #include <string.h>
29
30 static pdf_cmap *
31 pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up)
32 {
33 pdf_cycle_list cycle;
34 fz_stream *file = NULL;
35 pdf_cmap *cmap = NULL;
36 pdf_cmap *usecmap = NULL;
37 pdf_obj *obj;
38
39 fz_var(file);
40 fz_var(cmap);
41 fz_var(usecmap);
42
43 if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
44 return cmap;
45
46 fz_try(ctx)
47 {
48 file = pdf_open_stream(ctx, stmobj);
49 cmap = pdf_load_cmap(ctx, file);
50
51 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
52 if (pdf_is_int(ctx, obj))
53 pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
54
55 obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
56 if (pdf_is_name(ctx, obj))
57 {
58 fz_try(ctx)
59 {
60 usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
61 pdf_set_usecmap(ctx, cmap, usecmap);
62 }
63 fz_catch(ctx)
64 {
65 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
66 fz_report_error(ctx);
67 fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
68 }
69 }
70 else if (pdf_is_indirect(ctx, obj))
71 {
72 if (pdf_cycle(ctx, &cycle, cycle_up, obj))
73 fz_throw(ctx, FZ_ERROR_FORMAT, "recursive CMap");
74 usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle);
75 pdf_set_usecmap(ctx, cmap, usecmap);
76 }
77 else if (strlen(cmap->usecmap_name) > 0)
78 {
79 fz_try(ctx)
80 {
81 usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
82 pdf_set_usecmap(ctx, cmap, usecmap);
83 }
84 fz_catch(ctx)
85 {
86 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
87 fz_report_error(ctx);
88 fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
89 }
90 }
91
92 pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
93 }
94 fz_always(ctx)
95 {
96 fz_drop_stream(ctx, file);
97 pdf_drop_cmap(ctx, usecmap);
98 }
99 fz_catch(ctx)
100 {
101 pdf_drop_cmap(ctx, cmap);
102 fz_rethrow(ctx);
103 }
104
105 return cmap;
106 }
107
108 pdf_cmap *
109 pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
110 {
111 return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL);
112 }
113
114 pdf_cmap *
115 pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
116 {
117 pdf_cmap *cmap = pdf_new_cmap(ctx);
118 fz_try(ctx)
119 {
120 unsigned int high = (1 << (bytes * 8)) - 1;
121 if (wmode)
122 fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
123 else
124 fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
125 pdf_add_codespace(ctx, cmap, 0, high, bytes);
126 pdf_map_range_to_range(ctx, cmap, 0, high, 0);
127 pdf_sort_cmap(ctx, cmap);
128 pdf_set_cmap_wmode(ctx, cmap, wmode);
129 }
130 fz_catch(ctx)
131 {
132 pdf_drop_cmap(ctx, cmap);
133 fz_rethrow(ctx);
134 }
135 return cmap;
136 }
137
138 #ifdef NO_CJK
139
140 pdf_cmap *
141 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
142 {
143 if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
144 if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
145 if (!strcmp(name, "TrueType-UCS2")) return &cmap_TrueType_UCS2;
146 return NULL;
147 }
148
149 #else
150
151 /* To regenerate this list: :r !bash scripts/runcmapdump.sh */
152
153 #include "cmaps/83pv-RKSJ-H.h"
154 #include "cmaps/90ms-RKSJ-H.h"
155 #include "cmaps/90ms-RKSJ-V.h"
156 #include "cmaps/90msp-RKSJ-H.h"
157 #include "cmaps/90msp-RKSJ-V.h"
158 #include "cmaps/90pv-RKSJ-H.h"
159 #include "cmaps/Add-RKSJ-H.h"
160 #include "cmaps/Add-RKSJ-V.h"
161 #include "cmaps/Adobe-CNS1-UCS2.h"
162 #include "cmaps/Adobe-GB1-UCS2.h"
163 #include "cmaps/Adobe-Japan1-UCS2.h"
164 #include "cmaps/Adobe-Korea1-UCS2.h"
165 #include "cmaps/B5pc-H.h"
166 #include "cmaps/B5pc-V.h"
167 #include "cmaps/CNS-EUC-H.h"
168 #include "cmaps/CNS-EUC-V.h"
169 #include "cmaps/ETen-B5-H.h"
170 #include "cmaps/ETen-B5-V.h"
171 #include "cmaps/ETenms-B5-H.h"
172 #include "cmaps/ETenms-B5-V.h"
173 #include "cmaps/EUC-H.h"
174 #include "cmaps/EUC-V.h"
175 #include "cmaps/Ext-RKSJ-H.h"
176 #include "cmaps/Ext-RKSJ-V.h"
177 #include "cmaps/GB-EUC-H.h"
178 #include "cmaps/GB-EUC-V.h"
179 #include "cmaps/GBK-EUC-H.h"
180 #include "cmaps/GBK-EUC-V.h"
181 #include "cmaps/GBK-X.h"
182 #include "cmaps/GBK2K-H.h"
183 #include "cmaps/GBK2K-V.h"
184 #include "cmaps/GBKp-EUC-H.h"
185 #include "cmaps/GBKp-EUC-V.h"
186 #include "cmaps/GBpc-EUC-H.h"
187 #include "cmaps/GBpc-EUC-V.h"
188 #include "cmaps/H.h"
189 #include "cmaps/HKscs-B5-H.h"
190 #include "cmaps/HKscs-B5-V.h"
191 #include "cmaps/Identity-H.h"
192 #include "cmaps/Identity-V.h"
193 #include "cmaps/KSC-EUC-H.h"
194 #include "cmaps/KSC-EUC-V.h"
195 #include "cmaps/KSCms-UHC-H.h"
196 #include "cmaps/KSCms-UHC-HW-H.h"
197 #include "cmaps/KSCms-UHC-HW-V.h"
198 #include "cmaps/KSCms-UHC-V.h"
199 #include "cmaps/KSCpc-EUC-H.h"
200 #include "cmaps/UniCNS-UCS2-H.h"
201 #include "cmaps/UniCNS-UCS2-V.h"
202 #include "cmaps/UniCNS-UTF16-H.h"
203 #include "cmaps/UniCNS-UTF16-V.h"
204 #include "cmaps/UniCNS-X.h"
205 #include "cmaps/UniGB-UCS2-H.h"
206 #include "cmaps/UniGB-UCS2-V.h"
207 #include "cmaps/UniGB-UTF16-H.h"
208 #include "cmaps/UniGB-UTF16-V.h"
209 #include "cmaps/UniGB-X.h"
210 #include "cmaps/UniJIS-UCS2-H.h"
211 #include "cmaps/UniJIS-UCS2-HW-H.h"
212 #include "cmaps/UniJIS-UCS2-HW-V.h"
213 #include "cmaps/UniJIS-UCS2-V.h"
214 #include "cmaps/UniJIS-UTF16-H.h"
215 #include "cmaps/UniJIS-UTF16-V.h"
216 #include "cmaps/UniJIS-X.h"
217 #include "cmaps/UniKS-UCS2-H.h"
218 #include "cmaps/UniKS-UCS2-V.h"
219 #include "cmaps/UniKS-UTF16-H.h"
220 #include "cmaps/UniKS-UTF16-V.h"
221 #include "cmaps/UniKS-X.h"
222 #include "cmaps/V.h"
223
224 static pdf_cmap *table[] = {
225 &cmap_83pv_RKSJ_H,
226 &cmap_90ms_RKSJ_H,
227 &cmap_90ms_RKSJ_V,
228 &cmap_90msp_RKSJ_H,
229 &cmap_90msp_RKSJ_V,
230 &cmap_90pv_RKSJ_H,
231 &cmap_Add_RKSJ_H,
232 &cmap_Add_RKSJ_V,
233 &cmap_Adobe_CNS1_UCS2,
234 &cmap_Adobe_GB1_UCS2,
235 &cmap_Adobe_Japan1_UCS2,
236 &cmap_Adobe_Korea1_UCS2,
237 &cmap_B5pc_H,
238 &cmap_B5pc_V,
239 &cmap_CNS_EUC_H,
240 &cmap_CNS_EUC_V,
241 &cmap_ETen_B5_H,
242 &cmap_ETen_B5_V,
243 &cmap_ETenms_B5_H,
244 &cmap_ETenms_B5_V,
245 &cmap_EUC_H,
246 &cmap_EUC_V,
247 &cmap_Ext_RKSJ_H,
248 &cmap_Ext_RKSJ_V,
249 &cmap_GB_EUC_H,
250 &cmap_GB_EUC_V,
251 &cmap_GBK_EUC_H,
252 &cmap_GBK_EUC_V,
253 &cmap_GBK_X,
254 &cmap_GBK2K_H,
255 &cmap_GBK2K_V,
256 &cmap_GBKp_EUC_H,
257 &cmap_GBKp_EUC_V,
258 &cmap_GBpc_EUC_H,
259 &cmap_GBpc_EUC_V,
260 &cmap_H,
261 &cmap_HKscs_B5_H,
262 &cmap_HKscs_B5_V,
263 &cmap_Identity_H,
264 &cmap_Identity_V,
265 &cmap_KSC_EUC_H,
266 &cmap_KSC_EUC_V,
267 &cmap_KSCms_UHC_H,
268 &cmap_KSCms_UHC_HW_H,
269 &cmap_KSCms_UHC_HW_V,
270 &cmap_KSCms_UHC_V,
271 &cmap_KSCpc_EUC_H,
272 &cmap_TrueType_UCS2,
273 &cmap_UniCNS_UCS2_H,
274 &cmap_UniCNS_UCS2_V,
275 &cmap_UniCNS_UTF16_H,
276 &cmap_UniCNS_UTF16_V,
277 &cmap_UniCNS_X,
278 &cmap_UniGB_UCS2_H,
279 &cmap_UniGB_UCS2_V,
280 &cmap_UniGB_UTF16_H,
281 &cmap_UniGB_UTF16_V,
282 &cmap_UniGB_X,
283 &cmap_UniJIS_UCS2_H,
284 &cmap_UniJIS_UCS2_HW_H,
285 &cmap_UniJIS_UCS2_HW_V,
286 &cmap_UniJIS_UCS2_V,
287 &cmap_UniJIS_UTF16_H,
288 &cmap_UniJIS_UTF16_V,
289 &cmap_UniJIS_X,
290 &cmap_UniKS_UCS2_H,
291 &cmap_UniKS_UCS2_V,
292 &cmap_UniKS_UTF16_H,
293 &cmap_UniKS_UTF16_V,
294 &cmap_UniKS_X,
295 &cmap_V,
296 };
297
298 pdf_cmap *
299 pdf_load_builtin_cmap(fz_context *ctx, const char *name)
300 {
301 int r = nelem(table)-1;
302 int l = 0;
303 while (l <= r)
304 {
305 int m = (l + r) >> 1;
306 int c = strcmp(name, table[m]->cmap_name);
307 if (c < 0)
308 r = m - 1;
309 else if (c > 0)
310 l = m + 1;
311 else
312 return table[m];
313 }
314 return NULL;
315 }
316
317 #endif
318
319 pdf_cmap *
320 pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
321 {
322 pdf_cmap *usecmap;
323 pdf_cmap *cmap;
324
325 cmap = pdf_load_builtin_cmap(ctx, cmap_name);
326 if (!cmap)
327 fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap_name);
328
329 if (cmap->usecmap_name[0] && !cmap->usecmap)
330 {
331 usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
332 if (!usecmap)
333 fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap->usecmap_name);
334 pdf_set_usecmap(ctx, cmap, usecmap);
335 }
336
337 return cmap;
338 }