comparison mupdf-source/source/pdf/pdf-font.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <assert.h>
27
28 #include <ft2build.h>
29 #include FT_FREETYPE_H
30 #include FT_ADVANCES_H
31 #ifdef FT_FONT_FORMATS_H
32 #include FT_FONT_FORMATS_H
33 #else
34 #include FT_XFREE86_H
35 #endif
36 #include FT_TRUETYPE_TABLES_H
37
38 #ifndef FT_SFNT_HEAD
39 #define FT_SFNT_HEAD ft_sfnt_head
40 #endif
41
42 void
43 pdf_load_encoding(const char **estrings, const char *encoding)
44 {
45 const char * const *bstrings = NULL;
46 int i;
47
48 if (!strcmp(encoding, "StandardEncoding"))
49 bstrings = fz_glyph_name_from_adobe_standard;
50 if (!strcmp(encoding, "MacRomanEncoding"))
51 bstrings = fz_glyph_name_from_mac_roman;
52 if (!strcmp(encoding, "MacExpertEncoding"))
53 bstrings = fz_glyph_name_from_mac_expert;
54 if (!strcmp(encoding, "WinAnsiEncoding"))
55 bstrings = fz_glyph_name_from_win_ansi;
56
57 if (bstrings)
58 for (i = 0; i < 256; i++)
59 estrings[i] = bstrings[i];
60 }
61
62 static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
63 const char *collection, const char *basefont, int iscidfont);
64
65 static const char *base_font_names[][10] =
66 {
67 { "Courier", "CourierNew", "CourierNewPSMT", NULL },
68 { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
69 "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
70 { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
71 "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
72 { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
73 "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
74 { "Helvetica", "ArialMT", "Arial", NULL },
75 { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
76 "Helvetica,Bold", NULL },
77 { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
78 "Helvetica,Italic", "Helvetica-Italic", NULL },
79 { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
80 "Arial,BoldItalic", "Arial-BoldItalic",
81 "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
82 { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
83 "TimesNewRomanPS", NULL },
84 { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
85 "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
86 { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
87 "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
88 { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
89 "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
90 "TimesNewRoman-BoldItalic", NULL },
91 { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
92 "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
93 { "ZapfDingbats", NULL }
94 };
95
96 const unsigned char *
97 pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
98 {
99 if (mono) {
100 if (bold) {
101 if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
102 else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
103 } else {
104 if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
105 else return fz_lookup_base14_font(ctx, "Courier", len);
106 }
107 } else if (serif) {
108 if (bold) {
109 if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
110 else return fz_lookup_base14_font(ctx, "Times-Bold", len);
111 } else {
112 if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
113 else return fz_lookup_base14_font(ctx, "Times-Roman", len);
114 }
115 } else {
116 if (bold) {
117 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
118 else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
119 } else {
120 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
121 else return fz_lookup_base14_font(ctx, "Helvetica", len);
122 }
123 }
124 }
125
126 static int is_dynalab(char *name)
127 {
128 if (strstr(name, "HuaTian"))
129 return 1;
130 if (strstr(name, "MingLi"))
131 return 1;
132 if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
133 return 1;
134 if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
135 return 1;
136 return 0;
137 }
138
139 static int strcmp_ignore_space(const char *a, const char *b)
140 {
141 while (1)
142 {
143 while (*a == ' ')
144 a++;
145 while (*b == ' ')
146 b++;
147 if (*a != *b)
148 return 1;
149 if (*a == 0)
150 return *a != *b;
151 if (*b == 0)
152 return *a != *b;
153 a++;
154 b++;
155 }
156 }
157
158 const char *pdf_clean_font_name(const char *fontname)
159 {
160 int i, k;
161 for (i = 0; i < (int)nelem(base_font_names); i++)
162 for (k = 0; base_font_names[i][k]; k++)
163 if (!strcmp_ignore_space(base_font_names[i][k], fontname))
164 return base_font_names[i][0];
165 return fontname;
166 }
167
168 /*
169 * FreeType and Rendering glue
170 */
171
172 enum { UNKNOWN, TYPE1, TRUETYPE };
173
174 static int ft_kind(fz_context *ctx, FT_Face face)
175 {
176 const char *kind;
177 fz_ft_lock(ctx);
178 #ifdef FT_FONT_FORMATS_H
179 kind = FT_Get_Font_Format(face);
180 #else
181 kind = FT_Get_X11_Font_Format(face);
182 #endif
183 fz_ft_unlock(ctx);
184 if (!strcmp(kind, "TrueType")) return TRUETYPE;
185 if (!strcmp(kind, "Type 1")) return TYPE1;
186 if (!strcmp(kind, "CFF")) return TYPE1;
187 if (!strcmp(kind, "CID Type 1")) return TYPE1;
188 return UNKNOWN;
189 }
190
191 static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
192 {
193 if (fontdesc->to_ttf_cmap)
194 {
195 cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
196
197 /* vertical presentation forms */
198 if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
199 {
200 switch (cid)
201 {
202 case 0x0021: cid = 0xFE15; break; /* ! */
203 case 0x0028: cid = 0xFE35; break; /* ( */
204 case 0x0029: cid = 0xFE36; break; /* ) */
205 case 0x002C: cid = 0xFE10; break; /* , */
206 case 0x003A: cid = 0xFE13; break; /* : */
207 case 0x003B: cid = 0xFE14; break; /* ; */
208 case 0x003F: cid = 0xFE16; break; /* ? */
209 case 0x005B: cid = 0xFE47; break; /* [ */
210 case 0x005D: cid = 0xFE48; break; /* ] */
211 case 0x005F: cid = 0xFE33; break; /* _ */
212 case 0x007B: cid = 0xFE37; break; /* { */
213 case 0x007D: cid = 0xFE38; break; /* } */
214 case 0x2013: cid = 0xFE32; break; /* EN DASH */
215 case 0x2014: cid = 0xFE31; break; /* EM DASH */
216 case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
217 case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
218 case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
219 case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
220 case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
221 case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
222 case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
223 case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
224 case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
225 case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
226 case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
227 case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
228 case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
229 case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
230 case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
231 case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
232 case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
233 case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
234
235 case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
236 case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
237 case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
238 case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
239 case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
240 case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
241 case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
242 case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
243 case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
244 case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
245 case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
246 case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
247
248 case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
249 case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
250 }
251 }
252
253 return ft_char_index(fontdesc->font->ft_face, cid);
254 }
255
256 if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
257 return fontdesc->cid_to_gid[cid];
258
259 return cid;
260 }
261
262 int
263 pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
264 {
265 if (fontdesc->font->ft_face)
266 {
267 int gid;
268 fz_ft_lock(ctx);
269 gid = ft_cid_to_gid(fontdesc, cid);
270 fz_ft_unlock(ctx);
271 return gid;
272 }
273 return cid;
274 }
275
276 static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
277 {
278 int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
279 int gid = ft_cid_to_gid(fontdesc, cid);
280 FT_Fixed adv = 0;
281 int fterr;
282 FT_Face face = fontdesc->font->ft_face;
283 FT_UShort units_per_EM;
284
285 fterr = FT_Get_Advance(face, gid, mask, &adv);
286 if (fterr && fterr != FT_Err_Invalid_Argument)
287 fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
288
289 units_per_EM = face->units_per_EM;
290 if (units_per_EM == 0)
291 units_per_EM = 2048;
292
293 return adv * 1000 / units_per_EM;
294 }
295
296 static const struct { int code; const char *name; } mre_diff_table[] =
297 {
298 { 173, "notequal" },
299 { 176, "infinity" },
300 { 178, "lessequal" },
301 { 179, "greaterequal" },
302 { 182, "partialdiff" },
303 { 183, "summation" },
304 { 184, "product" },
305 { 185, "pi" },
306 { 186, "integral" },
307 { 189, "Omega" },
308 { 195, "radical" },
309 { 197, "approxequal" },
310 { 198, "Delta" },
311 { 215, "lozenge" },
312 { 219, "Euro" },
313 { 240, "apple" },
314 };
315
316 static int lookup_mre_code(const char *name)
317 {
318 int i;
319 for (i = 0; i < (int)nelem(mre_diff_table); ++i)
320 if (!strcmp(name, mre_diff_table[i].name))
321 return mre_diff_table[i].code;
322 for (i = 0; i < 256; i++)
323 if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
324 return i;
325 return -1;
326 }
327
328 static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
329 {
330 int unicode, glyph;
331
332 /* Prefer exact unicode match if available. */
333 unicode = fz_unicode_from_glyph_name_strict(name);
334 if (unicode > 0)
335 {
336 glyph = ft_char_index(face, unicode);
337 if (glyph > 0)
338 return glyph;
339 }
340
341 /* Fall back to font glyph name if we can. */
342 glyph = ft_name_index(face, name);
343 if (glyph > 0)
344 return glyph;
345
346 /* Fuzzy unicode match as last attempt. */
347 unicode = fz_unicode_from_glyph_name(name);
348 if (unicode > 0)
349 return ft_char_index(face, unicode);
350
351 /* Failed. */
352 return 0;
353 }
354
355 /*
356 * Load font files.
357 */
358
359 static void
360 pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
361 {
362 FT_Face face;
363 const char *clean_name = pdf_clean_font_name(fontname);
364 if (clean_name == fontname)
365 clean_name = "Times-Roman";
366
367 fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
368 if (!fontdesc->font)
369 {
370 const unsigned char *data;
371 int len;
372
373 data = fz_lookup_base14_font(ctx, clean_name, &len);
374 if (!data)
375 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
376
377 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
378 fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
379 }
380
381 if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
382 fontdesc->flags |= PDF_FD_SYMBOLIC;
383
384 face = fontdesc->font->ft_face;
385 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
386 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
387 }
388
389 static void
390 pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
391 {
392 fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
393 if (!fontdesc->font)
394 {
395 const unsigned char *data;
396 int len;
397
398 data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
399 if (!data)
400 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
401
402 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
403 fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
404 fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
405
406 fontdesc->font->flags.is_mono = mono;
407 fontdesc->font->flags.is_serif = serif;
408 fontdesc->font->flags.is_bold = bold;
409 fontdesc->font->flags.is_italic = italic;
410 }
411
412 fontdesc->font->flags.ft_substitute = 1;
413 fontdesc->font->flags.ft_stretch = 1;
414 }
415
416 static void
417 pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
418 {
419 fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
420 if (!fontdesc->font)
421 {
422 const unsigned char *data;
423 int size;
424 int subfont;
425
426 data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
427 if (!data)
428 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
429
430 /* A glyph bbox cache is too big for CJK fonts. */
431 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
432 }
433
434 fontdesc->font->flags.ft_substitute = 1;
435 fontdesc->font->flags.ft_stretch = 0;
436 fontdesc->font->flags.cjk = 1;
437 fontdesc->font->flags.cjk_lang = ros;
438 }
439
440 static struct { int ros, serif; const char *name; } known_cjk_fonts[] = {
441 { FZ_ADOBE_GB, 1, "SimFang" },
442 { FZ_ADOBE_GB, 0, "SimHei" },
443 { FZ_ADOBE_GB, 1, "SimKai" },
444 { FZ_ADOBE_GB, 1, "SimLi" },
445 { FZ_ADOBE_GB, 1, "SimSun" },
446 { FZ_ADOBE_GB, 1, "Song" },
447
448 { FZ_ADOBE_CNS, 1, "MingLiU" },
449
450 { FZ_ADOBE_JAPAN, 0, "Gothic" },
451 { FZ_ADOBE_JAPAN, 1, "Mincho" },
452
453 { FZ_ADOBE_KOREA, 1, "Batang" },
454 { FZ_ADOBE_KOREA, 0, "Gulim" },
455 { FZ_ADOBE_KOREA, 0, "Dotum" },
456 };
457
458 static int match_font_name(const char *s, const char *ref)
459 {
460 return !!strstr(s, ref);
461 }
462
463 static void
464 pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
465 {
466 int bold = 0;
467 int italic = 0;
468 int serif = 0;
469 int mono = 0;
470
471 if (strstr(fontname, "Bold"))
472 bold = 1;
473 if (strstr(fontname, "Italic"))
474 italic = 1;
475 if (strstr(fontname, "Oblique"))
476 italic = 1;
477
478 if (fontdesc->flags & PDF_FD_FIXED_PITCH)
479 mono = 1;
480 if (fontdesc->flags & PDF_FD_SERIF)
481 serif = 1;
482 if (fontdesc->flags & PDF_FD_ITALIC)
483 italic = 1;
484 if (fontdesc->flags & PDF_FD_FORCE_BOLD)
485 bold = 1;
486
487 if (collection)
488 {
489 if (!strcmp(collection, "Adobe-CNS1"))
490 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
491 else if (!strcmp(collection, "Adobe-GB1"))
492 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
493 else if (!strcmp(collection, "Adobe-Japan1"))
494 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
495 else if (!strcmp(collection, "Adobe-Korea1"))
496 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
497 else
498 {
499 size_t i;
500 if (strcmp(collection, "Adobe-Identity") != 0)
501 fz_warn(ctx, "unknown cid collection: %s", collection);
502
503 // Recognize common CJK fonts when using Identity or other non-CJK CMap
504 for (i = 0; i < nelem(known_cjk_fonts); ++i)
505 {
506 if (match_font_name(fontname, known_cjk_fonts[i].name))
507 {
508 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname,
509 known_cjk_fonts[i].ros, known_cjk_fonts[i].serif);
510 return;
511 }
512 }
513
514 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
515 }
516 }
517 else
518 {
519 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
520 }
521 }
522
523 #define TTF_U16(p) ((uint16_t) ((p)[0]<<8) | ((p)[1]))
524 #define TTF_U32(p) ((uint32_t) ((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | ((p)[3]))
525
526 static fz_buffer *
527 pdf_extract_cff_subtable(fz_context *ctx, unsigned char *data, size_t size)
528 {
529 size_t num_tables = TTF_U16(data + 4);
530 size_t i;
531
532 if (12 + num_tables * 16 > size)
533 fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF header");
534
535 for (i = 0; i < num_tables; ++i)
536 {
537 unsigned char *record = data + 12 + i * 16;
538 if (!memcmp("CFF ", record, 4))
539 {
540 uint64_t offset = TTF_U32(record + 8);
541 uint64_t length = TTF_U32(record + 12);
542 uint64_t end = offset + length;
543 if (end > size)
544 fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF subtable offset/length");
545 return fz_new_buffer_from_copied_data(ctx, data + offset, length);
546 }
547 }
548
549 return NULL;
550 }
551
552 static void
553 pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
554 {
555 fz_buffer *buf;
556 unsigned char *data;
557 size_t size;
558
559 fz_var(buf);
560
561 buf = pdf_load_stream(ctx, stmref);
562
563 fz_try(ctx)
564 {
565 /* Extract CFF subtable for OpenType fonts: */
566 size = fz_buffer_storage(ctx, buf, &data);
567 if (size > 12) {
568 if (!memcmp("OTTO", data, 4)) {
569 fz_buffer *cff = pdf_extract_cff_subtable(ctx, data, size);
570 if (cff)
571 {
572 fz_drop_buffer(ctx, buf);
573 buf = cff;
574 }
575 }
576 }
577
578 fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
579 }
580 fz_always(ctx)
581 fz_drop_buffer(ctx, buf);
582 fz_catch(ctx)
583 fz_rethrow(ctx);
584
585 fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
586 fontdesc->is_embedded = 1;
587 }
588
589 /*
590 * Create and destroy
591 */
592
593 pdf_font_desc *
594 pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
595 {
596 return fz_keep_storable(ctx, &fontdesc->storable);
597 }
598
599 void
600 pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
601 {
602 fz_drop_storable(ctx, &fontdesc->storable);
603 }
604
605 static int
606 pdf_font_is_droppable(fz_context *ctx, fz_storable *fontdesc)
607 {
608 /* If we aren't holding the FT lock, then we can drop. */
609 return !fz_ft_lock_held(ctx);
610 }
611
612 static void
613 pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
614 {
615 pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
616
617 fz_drop_font(ctx, fontdesc->font);
618 pdf_drop_cmap(ctx, fontdesc->encoding);
619 pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
620 pdf_drop_cmap(ctx, fontdesc->to_unicode);
621 fz_free(ctx, fontdesc->cid_to_gid);
622 fz_free(ctx, fontdesc->cid_to_ucs);
623 fz_free(ctx, fontdesc->hmtx);
624 fz_free(ctx, fontdesc->vmtx);
625 fz_free(ctx, fontdesc);
626 }
627
628 pdf_font_desc *
629 pdf_new_font_desc(fz_context *ctx)
630 {
631 pdf_font_desc *fontdesc;
632
633 fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
634 FZ_INIT_AWKWARD_STORABLE(fontdesc, 1, pdf_drop_font_imp, pdf_font_is_droppable);
635 fontdesc->size = sizeof(pdf_font_desc);
636
637 fontdesc->font = NULL;
638
639 fontdesc->flags = 0;
640 fontdesc->italic_angle = 0;
641 fontdesc->ascent = 800;
642 fontdesc->descent = -200;
643 fontdesc->cap_height = 800;
644 fontdesc->x_height = 500;
645 fontdesc->missing_width = 0;
646
647 fontdesc->encoding = NULL;
648 fontdesc->to_ttf_cmap = NULL;
649 fontdesc->cid_to_gid_len = 0;
650 fontdesc->cid_to_gid = NULL;
651
652 fontdesc->to_unicode = NULL;
653 fontdesc->cid_to_ucs_len = 0;
654 fontdesc->cid_to_ucs = NULL;
655
656 fontdesc->wmode = 0;
657
658 fontdesc->hmtx_cap = 0;
659 fontdesc->vmtx_cap = 0;
660 fontdesc->hmtx_len = 0;
661 fontdesc->vmtx_len = 0;
662 fontdesc->hmtx = NULL;
663 fontdesc->vmtx = NULL;
664
665 fontdesc->dhmtx.lo = 0x0000;
666 fontdesc->dhmtx.hi = 0xFFFF;
667 fontdesc->dhmtx.w = 1000;
668
669 fontdesc->dvmtx.lo = 0x0000;
670 fontdesc->dvmtx.hi = 0xFFFF;
671 fontdesc->dvmtx.x = 0;
672 fontdesc->dvmtx.y = 880;
673 fontdesc->dvmtx.w = -1000;
674
675 fontdesc->is_embedded = 0;
676
677 return fontdesc;
678 }
679
680 /*
681 * Simple fonts (Type1 and TrueType)
682 */
683
684 static FT_CharMap
685 select_type1_cmap(FT_Face face)
686 {
687 int i;
688 for (i = 0; i < face->num_charmaps; i++)
689 if (face->charmaps[i]->platform_id == 7)
690 return face->charmaps[i];
691 if (face->num_charmaps > 0)
692 return face->charmaps[0];
693 return NULL;
694 }
695
696 static FT_CharMap
697 select_truetype_cmap(fz_context *ctx, FT_Face face, int symbolic)
698 {
699 int i;
700
701 /* First look for a Microsoft symbolic cmap, if applicable */
702 if (symbolic)
703 {
704 for (i = 0; i < face->num_charmaps; i++)
705 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
706 return face->charmaps[i];
707 }
708
709 fz_ft_lock(ctx);
710
711 /* Then look for a Microsoft Unicode cmap */
712 for (i = 0; i < face->num_charmaps; i++)
713 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
714 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
715 {
716 fz_ft_unlock(ctx);
717 return face->charmaps[i];
718 }
719
720 /* Finally look for an Apple MacRoman cmap */
721 for (i = 0; i < face->num_charmaps; i++)
722 if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
723 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
724 {
725 fz_ft_unlock(ctx);
726 return face->charmaps[i];
727 }
728
729 if (face->num_charmaps > 0)
730 if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
731 {
732 fz_ft_unlock(ctx);
733 return face->charmaps[0];
734 }
735
736 fz_ft_unlock(ctx);
737 return NULL;
738 }
739
740 static FT_CharMap
741 select_unknown_cmap(FT_Face face)
742 {
743 if (face->num_charmaps > 0)
744 return face->charmaps[0];
745 return NULL;
746 }
747
748 static int use_s22pdf_workaround(fz_context *ctx, pdf_obj *dict, pdf_obj *descriptor)
749 {
750 if (descriptor)
751 {
752 if (pdf_dict_get(ctx, dict, PDF_NAME(Encoding)) != PDF_NAME(WinAnsiEncoding))
753 return 0;
754 if (pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) != 4)
755 return 0;
756 return 1;
757 }
758 return 0;
759 }
760
761 static pdf_font_desc *
762 pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
763 {
764 const char *basefont;
765 pdf_obj *descriptor;
766 pdf_obj *encoding;
767 pdf_obj *widths;
768 unsigned short *etable = NULL;
769 pdf_font_desc *fontdesc = NULL;
770 pdf_obj *subtype;
771 FT_Face face;
772 FT_CharMap cmap;
773 int symbolic;
774 int kind;
775 int glyph;
776
777 const char *estrings[256];
778 char ebuffer[256][32];
779 int i, k, n;
780 int fterr;
781 int has_lock = 0;
782
783 fz_var(fontdesc);
784 fz_var(etable);
785 fz_var(has_lock);
786
787 /* Load font file */
788 fz_try(ctx)
789 {
790 fontdesc = pdf_new_font_desc(ctx);
791
792 basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
793
794 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
795 if (descriptor)
796 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
797 else
798 pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
799
800 /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
801 if (use_s22pdf_workaround(ctx, dict, descriptor))
802 {
803 char *cp936fonts[] = {
804 "\xCB\xCE\xCC\xE5", "SimSun,Regular",
805 "\xBA\xDA\xCC\xE5", "SimHei,Regular",
806 "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
807 "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
808 "\xC1\xA5\xCA\xE9", "SimLi,Regular",
809 NULL
810 };
811 for (i = 0; cp936fonts[i]; i += 2)
812 if (!strcmp(basefont, cp936fonts[i]))
813 break;
814 if (cp936fonts[i])
815 {
816 fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
817 pdf_drop_font(ctx, fontdesc);
818 fontdesc = NULL;
819 fontdesc = pdf_new_font_desc(ctx);
820 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
821 fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
822 fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
823 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
824
825 goto skip_encoding;
826 }
827 }
828
829 face = fontdesc->font->ft_face;
830 kind = ft_kind(ctx, face);
831
832 /* Encoding */
833
834 symbolic = fontdesc->flags & 4;
835 /* Bug 703273: If non-symbolic, we're not symbolic. */
836 if (fontdesc->flags & 32)
837 symbolic = 0;
838
839 if (kind == TYPE1)
840 cmap = select_type1_cmap(face);
841 else if (kind == TRUETYPE)
842 cmap = select_truetype_cmap(ctx, face, symbolic);
843 else
844 cmap = select_unknown_cmap(face);
845
846 if (cmap)
847 {
848 fz_ft_lock(ctx);
849 fterr = FT_Set_Charmap(face, cmap);
850 fz_ft_unlock(ctx);
851 if (fterr)
852 fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
853 }
854 else
855 fz_warn(ctx, "freetype could not find any cmaps");
856
857 /* FIXME: etable may leak on error. */
858 etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid");
859 fontdesc->size += 256 * sizeof(unsigned short);
860 for (i = 0; i < 256; i++)
861 {
862 estrings[i] = NULL;
863 etable[i] = 0;
864 }
865
866 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
867 if (encoding)
868 {
869 if (pdf_is_name(ctx, encoding))
870 pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
871
872 if (pdf_is_dict(ctx, encoding))
873 {
874 pdf_obj *base, *diff, *item;
875
876 base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
877 if (pdf_is_name(ctx, base))
878 pdf_load_encoding(estrings, pdf_to_name(ctx, base));
879 else if (!fontdesc->is_embedded && !symbolic)
880 pdf_load_encoding(estrings, "StandardEncoding");
881
882 diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
883 if (pdf_is_array(ctx, diff))
884 {
885 n = pdf_array_len(ctx, diff);
886 k = 0;
887 for (i = 0; i < n; i++)
888 {
889 item = pdf_array_get(ctx, diff, i);
890 if (pdf_is_int(ctx, item))
891 k = pdf_to_int(ctx, item);
892 if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings))
893 estrings[k++] = pdf_to_name(ctx, item);
894 }
895 }
896 }
897 }
898 else if (!fontdesc->is_embedded && !symbolic)
899 pdf_load_encoding(estrings, "StandardEncoding");
900
901 fz_ft_lock(ctx);
902 has_lock = 1;
903
904 /* start with the builtin encoding */
905 for (i = 0; i < 256; i++)
906 etable[i] = ft_char_index(face, i);
907
908 /* built-in and substitute fonts may be a different type than what the document expects */
909 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
910 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
911 kind = TYPE1;
912 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
913 kind = TYPE1;
914 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
915 kind = TRUETYPE;
916 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
917 kind = TYPE1;
918 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
919 kind = TRUETYPE;
920
921 /* encode by glyph name where we can */
922 if (kind == TYPE1)
923 {
924 for (i = 0; i < 256; i++)
925 {
926 if (estrings[i])
927 {
928 glyph = ft_name_index(face, estrings[i]);
929 if (glyph > 0)
930 etable[i] = glyph;
931 }
932 }
933 }
934
935 /* encode by glyph name where we can */
936 if (kind == TRUETYPE)
937 {
938 /* Unicode cmap */
939 if (!symbolic && face->charmap && face->charmap->platform_id == 3)
940 {
941 for (i = 0; i < 256; i++)
942 {
943 if (estrings[i])
944 {
945 glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
946 if (glyph > 0)
947 etable[i] = glyph;
948 }
949 }
950 }
951
952 /* MacRoman cmap */
953 else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
954 {
955 for (i = 0; i < 256; i++)
956 {
957 if (estrings[i])
958 {
959 int mrcode = lookup_mre_code(estrings[i]);
960 glyph = 0;
961 if (mrcode > 0)
962 glyph = ft_char_index(face, mrcode);
963 if (glyph == 0)
964 glyph = ft_name_index(face, estrings[i]);
965 if (glyph > 0)
966 etable[i] = glyph;
967 }
968 }
969 }
970
971 /* Symbolic cmap */
972 else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
973 {
974 for (i = 0; i < 256; i++)
975 {
976 if (estrings[i])
977 {
978 glyph = ft_name_index(face, estrings[i]);
979 if (glyph > 0)
980 etable[i] = glyph;
981 }
982 }
983 }
984 }
985
986 /* try to reverse the glyph names from the builtin encoding */
987 for (i = 0; i < 256; i++)
988 {
989 if (etable[i] && !estrings[i])
990 {
991 if (FT_HAS_GLYPH_NAMES(face))
992 {
993 fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
994 if (fterr)
995 fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
996 if (ebuffer[i][0])
997 estrings[i] = ebuffer[i];
998 }
999 else
1000 {
1001 estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
1002 }
1003 }
1004 }
1005
1006 /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
1007 if (kind == TYPE1 && symbolic)
1008 {
1009 for (i = 0; i < 256; i++)
1010 if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
1011 estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
1012 }
1013
1014 fz_ft_unlock(ctx);
1015 has_lock = 0;
1016
1017 fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
1018 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1019 fontdesc->cid_to_gid_len = 256;
1020 fontdesc->cid_to_gid = etable;
1021
1022 fz_try(ctx)
1023 {
1024 pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
1025 }
1026 fz_catch(ctx)
1027 {
1028 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1029 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1030 fz_report_error(ctx);
1031 fz_warn(ctx, "cannot load ToUnicode CMap");
1032 }
1033
1034 skip_encoding:
1035
1036 /* Widths */
1037
1038 pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
1039
1040 widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
1041 if (widths)
1042 {
1043 int first, last;
1044
1045 first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
1046 last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
1047
1048 if (first < 0 || last > 255 || first > last)
1049 first = last = 0;
1050
1051 for (i = 0; i < last - first + 1; i++)
1052 {
1053 int wid = pdf_array_get_int(ctx, widths, i);
1054 pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
1055 }
1056 }
1057 else
1058 {
1059 fz_ft_lock(ctx);
1060 has_lock = 1;
1061 for (i = 0; i < 256; i++)
1062 pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
1063 fz_ft_unlock(ctx);
1064 has_lock = 0;
1065 }
1066
1067 pdf_end_hmtx(ctx, fontdesc);
1068 }
1069 fz_catch(ctx)
1070 {
1071 if (has_lock)
1072 fz_ft_unlock(ctx);
1073 if (fontdesc && etable != fontdesc->cid_to_gid)
1074 fz_free(ctx, etable);
1075 pdf_drop_font(ctx, fontdesc);
1076 fz_rethrow(ctx);
1077 }
1078 return fontdesc;
1079 }
1080
1081 static int
1082 hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
1083 {
1084 hash->u.pi.i = 0;
1085 hash->u.pi.ptr = NULL;
1086 return 1;
1087 }
1088
1089 static void *
1090 hail_mary_keep_key(fz_context *ctx, void *key)
1091 {
1092 return key;
1093 }
1094
1095 static void
1096 hail_mary_drop_key(fz_context *ctx, void *key)
1097 {
1098 }
1099
1100 static int
1101 hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
1102 {
1103 return k0 == k1;
1104 }
1105
1106 static void
1107 hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_)
1108 {
1109 fz_strlcpy(s, "(hail mary font)", n);
1110 }
1111
1112 static int hail_mary_store_key; /* Dummy */
1113
1114 static const fz_store_type hail_mary_store_type =
1115 {
1116 "hail-mary",
1117 hail_mary_make_hash_key,
1118 hail_mary_keep_key,
1119 hail_mary_drop_key,
1120 hail_mary_cmp_key,
1121 hail_mary_format_key,
1122 NULL
1123 };
1124
1125 pdf_font_desc *
1126 pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
1127 {
1128 pdf_font_desc *fontdesc;
1129 pdf_font_desc *existing;
1130
1131 if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
1132 {
1133 return fontdesc;
1134 }
1135
1136 /* FIXME: Get someone with a clue about fonts to fix this */
1137 fontdesc = pdf_load_simple_font(ctx, doc, NULL);
1138
1139 existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
1140 assert(existing == NULL);
1141 (void)existing; /* Silence warning in release builds */
1142
1143 return fontdesc;
1144 }
1145
1146 /*
1147 * CID Fonts
1148 */
1149
1150 static pdf_font_desc *
1151 load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
1152 {
1153 pdf_obj *widths;
1154 pdf_obj *descriptor;
1155 pdf_font_desc *fontdesc = NULL;
1156 fz_buffer *buf = NULL;
1157 pdf_cmap *cmap;
1158 FT_Face face;
1159 char collection[256];
1160 const char *basefont;
1161 int i, k, fterr;
1162 pdf_obj *cidtogidmap;
1163 pdf_obj *obj;
1164 int dw;
1165
1166 fz_var(fontdesc);
1167 fz_var(buf);
1168
1169 fz_try(ctx)
1170 {
1171 /* Get font name and CID collection */
1172
1173 basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont));
1174
1175 {
1176 pdf_obj *cidinfo;
1177 const char *reg, *ord;
1178
1179 cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
1180 if (cidinfo)
1181 {
1182 reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
1183 ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
1184 fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
1185 }
1186 else
1187 {
1188 fz_warn(ctx, "CIDFont is missing CIDSystemInfo dictionary; assuming Adobe-Identity");
1189 fz_strlcpy(collection, "Adobe-Identity", sizeof collection);
1190 }
1191 }
1192
1193 /* Encoding */
1194
1195 if (pdf_is_name(ctx, encoding))
1196 {
1197 cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
1198 }
1199 else if (pdf_is_indirect(ctx, encoding))
1200 {
1201 cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
1202 }
1203 else
1204 {
1205 fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
1206 }
1207
1208 /* Load font file */
1209
1210 fontdesc = pdf_new_font_desc(ctx);
1211
1212 fontdesc->encoding = cmap;
1213 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1214
1215 pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
1216
1217 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
1218 if (!descriptor)
1219 fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
1220 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
1221
1222 face = fontdesc->font->ft_face;
1223
1224 /* Apply encoding */
1225
1226 cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
1227 if (pdf_is_stream(ctx, cidtogidmap))
1228 {
1229 size_t z, len;
1230 unsigned char *data;
1231
1232 buf = pdf_load_stream(ctx, cidtogidmap);
1233
1234 len = fz_buffer_storage(ctx, buf, &data);
1235 fontdesc->cid_to_gid_len = len / 2;
1236 fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map");
1237 fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
1238 for (z = 0; z < fontdesc->cid_to_gid_len; z++)
1239 fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
1240 }
1241 else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
1242 {
1243 fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
1244 }
1245
1246 /* if font is external, cidtogidmap should not be identity */
1247 /* so we map from cid to unicode and then map that through the (3 1) */
1248 /* unicode cmap to get a glyph id */
1249 else if (fontdesc->font->flags.ft_substitute)
1250 {
1251 fz_ft_lock(ctx);
1252 fterr = FT_Select_Charmap(face, ft_encoding_unicode);
1253 fz_ft_unlock(ctx);
1254 if (fterr)
1255 fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
1256
1257 if (!strcmp(collection, "Adobe-CNS1"))
1258 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
1259 else if (!strcmp(collection, "Adobe-GB1"))
1260 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
1261 else if (!strcmp(collection, "Adobe-Japan1"))
1262 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
1263 else if (!strcmp(collection, "Adobe-Japan2"))
1264 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
1265 else if (!strcmp(collection, "Adobe-Korea1"))
1266 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
1267 }
1268
1269 pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
1270
1271 /* If we have an identity encoding, we're supposed to use the glyph ids directly.
1272 * If we only have a substitute font, that won't work.
1273 * Make a last ditch attempt by using
1274 * the ToUnicode table if it exists to map via the substitute font's cmap. */
1275 if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
1276 {
1277 if (!fontdesc->to_ttf_cmap)
1278 {
1279 if (fontdesc->to_unicode)
1280 {
1281 // Use ToUnicode from PDF file if possible.
1282 fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
1283 }
1284 else
1285 {
1286 // Attempt a generic ToUnicode (default MacRoman ordering for TrueType)
1287 fontdesc->to_ttf_cmap = pdf_load_builtin_cmap(ctx, "TrueType-UCS2");
1288 }
1289 }
1290
1291 if (fontdesc->to_ttf_cmap)
1292 {
1293 fz_warn(ctx, "non-embedded font using identity encoding: %s (mapping via %s)", basefont, fontdesc->to_ttf_cmap->cmap_name);
1294 if (!fontdesc->to_unicode)
1295 fontdesc->to_unicode = pdf_keep_cmap(ctx, fontdesc->to_ttf_cmap);
1296 }
1297 else
1298 fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
1299 }
1300
1301 /* Horizontal */
1302
1303 dw = pdf_dict_get_int_default(ctx, dict, PDF_NAME(DW), 1000);
1304 pdf_set_default_hmtx(ctx, fontdesc, dw);
1305
1306 widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
1307 if (widths)
1308 {
1309 int c0, c1, w, n, m;
1310
1311 n = pdf_array_len(ctx, widths);
1312 for (i = 0; i < n; )
1313 {
1314 c0 = pdf_array_get_int(ctx, widths, i);
1315 obj = pdf_array_get(ctx, widths, i + 1);
1316 if (pdf_is_array(ctx, obj))
1317 {
1318 m = pdf_array_len(ctx, obj);
1319 for (k = 0; k < m; k++)
1320 {
1321 w = pdf_array_get_int(ctx, obj, k);
1322 pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
1323 }
1324 i += 2;
1325 }
1326 else
1327 {
1328 c1 = pdf_to_int(ctx, obj);
1329 w = pdf_array_get_int(ctx, widths, i + 2);
1330 pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
1331 i += 3;
1332 }
1333 }
1334 }
1335
1336 pdf_end_hmtx(ctx, fontdesc);
1337
1338 /* Vertical */
1339
1340 if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
1341 {
1342 int dw2y = 880;
1343 int dw2w = -1000;
1344
1345 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
1346 if (obj)
1347 {
1348 dw2y = pdf_array_get_int(ctx, obj, 0);
1349 dw2w = pdf_array_get_int(ctx, obj, 1);
1350 }
1351
1352 pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
1353
1354 widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
1355 if (widths)
1356 {
1357 int c0, c1, w, x, y, n;
1358
1359 n = pdf_array_len(ctx, widths);
1360 for (i = 0; i < n; )
1361 {
1362 c0 = pdf_array_get_int(ctx, widths, i);
1363 obj = pdf_array_get(ctx, widths, i + 1);
1364 if (pdf_is_array(ctx, obj))
1365 {
1366 int m = pdf_array_len(ctx, obj);
1367 for (k = 0; k * 3 < m; k ++)
1368 {
1369 w = pdf_array_get_int(ctx, obj, k * 3 + 0);
1370 x = pdf_array_get_int(ctx, obj, k * 3 + 1);
1371 y = pdf_array_get_int(ctx, obj, k * 3 + 2);
1372 pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
1373 }
1374 i += 2;
1375 }
1376 else
1377 {
1378 c1 = pdf_to_int(ctx, obj);
1379 w = pdf_array_get_int(ctx, widths, i + 2);
1380 x = pdf_array_get_int(ctx, widths, i + 3);
1381 y = pdf_array_get_int(ctx, widths, i + 4);
1382 pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
1383 i += 5;
1384 }
1385 }
1386 }
1387
1388 pdf_end_vmtx(ctx, fontdesc);
1389 }
1390 }
1391 fz_always(ctx)
1392 fz_drop_buffer(ctx, buf);
1393 fz_catch(ctx)
1394 {
1395 pdf_drop_font(ctx, fontdesc);
1396 fz_rethrow(ctx);
1397 }
1398
1399 return fontdesc;
1400 }
1401
1402 static pdf_font_desc *
1403 pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1404 {
1405 pdf_obj *dfonts;
1406 pdf_obj *dfont;
1407 pdf_obj *subtype;
1408 pdf_obj *encoding;
1409 pdf_obj *to_unicode;
1410
1411 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1412 if (!dfonts)
1413 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
1414
1415 dfont = pdf_array_get(ctx, dfonts, 0);
1416
1417 subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
1418 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
1419 to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
1420
1421 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
1422 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1423 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
1424 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1425 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
1426 }
1427
1428 /*
1429 * FontDescriptor
1430 */
1431
1432 static void
1433 pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
1434 const char *collection, const char *basefont, int iscidfont)
1435 {
1436 pdf_obj *obj1, *obj2, *obj3, *obj;
1437 const char *fontname;
1438 FT_Face face;
1439
1440 /* Prefer BaseFont; don't bother with FontName */
1441 fontname = basefont;
1442
1443 fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
1444 fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
1445 /* fontdesc->ascent and descent have already been set to sane defaults */
1446 fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
1447 fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
1448 fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
1449
1450 obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
1451 obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
1452 obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
1453 obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1454
1455 if (pdf_is_indirect(ctx, obj))
1456 {
1457 fz_try(ctx)
1458 {
1459 pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
1460 }
1461 fz_catch(ctx)
1462 {
1463 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1464 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
1465 fz_report_error(ctx);
1466 fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
1467 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1468 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1469 else
1470 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1471 }
1472 }
1473 else
1474 {
1475 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1476 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1477 else
1478 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1479 }
1480
1481 /* Check for DynaLab fonts that must use hinting */
1482 face = fontdesc->font->ft_face;
1483 if (ft_kind(ctx, face) == TRUETYPE)
1484 {
1485 /* FreeType's own 'tricky' font detection needs a bit of help */
1486 if (is_dynalab(fontdesc->font->name))
1487 face->face_flags |= FT_FACE_FLAG_TRICKY;
1488
1489 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
1490
1491 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
1492 }
1493
1494 /* Prefer FontDescriptor Ascent/Descent values to embedded font's */
1495 fontdesc->ascent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Ascent), fontdesc->ascent);
1496 fontdesc->descent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Descent), fontdesc->descent);
1497 /* Allow for naughty producers that give us a positive descent. */
1498 if (fontdesc->descent > 0)
1499 fontdesc->descent = -fontdesc->descent;
1500
1501 if (fontdesc->ascent <= 0 || fontdesc->ascent > FZ_MAX_TRUSTWORTHY_ASCENT * 1000 ||
1502 fontdesc->descent < FZ_MAX_TRUSTWORTHY_DESCENT * 1000)
1503 {
1504 fz_warn(ctx, "bogus font ascent/descent values (%g / %g)", fontdesc->ascent, fontdesc->descent);
1505 fontdesc->font->ascender = 0.8f;
1506 fontdesc->font->descender = -0.2f;
1507 fontdesc->font->ascdesc_src = FZ_ASCDESC_DEFAULT;
1508 }
1509 else
1510 {
1511 fontdesc->font->ascender = fontdesc->ascent / 1000.0f;
1512 fontdesc->font->descender = fontdesc->descent / 1000.0f;
1513 fontdesc->font->ascdesc_src = FZ_ASCDESC_FROM_FONT;
1514 }
1515 }
1516
1517 static void
1518 pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
1519 {
1520 fz_font *font = fontdesc->font;
1521 int i, k, n, cid, gid;
1522
1523 n = 0;
1524 for (i = 0; i < fontdesc->hmtx_len; i++)
1525 {
1526 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1527 {
1528 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1529 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1530 if (gid > n)
1531 n = gid;
1532 }
1533 }
1534
1535 font->width_count = n + 1;
1536 font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths");
1537 fontdesc->size += font->width_count * sizeof(short);
1538
1539 font->width_default = fontdesc->dhmtx.w;
1540 for (i = 0; i < font->width_count; i++)
1541 font->width_table[i] = -1;
1542
1543 for (i = 0; i < fontdesc->hmtx_len; i++)
1544 {
1545 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1546 {
1547 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1548 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1549 if (gid >= 0 && gid < font->width_count)
1550 font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
1551 }
1552 }
1553
1554 for (i = 0; i < font->width_count; i++)
1555 if (font->width_table[i] == -1)
1556 font->width_table[i] = font->width_default;
1557 }
1558
1559 pdf_font_desc *
1560 pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
1561 {
1562 pdf_obj *subtype;
1563 pdf_obj *dfonts;
1564 pdf_obj *charprocs;
1565 pdf_font_desc *fontdesc = NULL;
1566 int type3 = 0;
1567
1568 if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
1569 {
1570 if (fontdesc->t3loading)
1571 {
1572 pdf_drop_font(ctx, fontdesc);
1573 fz_throw(ctx, FZ_ERROR_SYNTAX, "recursive type3 font");
1574 }
1575 return fontdesc;
1576 }
1577
1578 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
1579 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1580 charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
1581
1582 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
1583 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1584 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
1585 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1586 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
1587 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1588 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
1589 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1590 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
1591 {
1592 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1593 type3 = 1;
1594 }
1595 else if (charprocs)
1596 {
1597 fz_warn(ctx, "unknown font format, guessing type3.");
1598 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1599 type3 = 1;
1600 }
1601 else if (dfonts)
1602 {
1603 fz_warn(ctx, "unknown font format, guessing type0.");
1604 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1605 }
1606 else
1607 {
1608 fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
1609 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1610 }
1611
1612 fz_try(ctx)
1613 {
1614 /* Create glyph width table for stretching substitute fonts and text extraction. */
1615 pdf_make_width_table(ctx, fontdesc);
1616
1617 pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
1618
1619 /* Load CharProcs */
1620 if (type3)
1621 {
1622 fontdesc->t3loading = 1;
1623 fz_try(ctx)
1624 pdf_load_type3_glyphs(ctx, doc, fontdesc);
1625 fz_always(ctx)
1626 fontdesc->t3loading = 0;
1627 fz_catch(ctx)
1628 {
1629 pdf_remove_item(ctx, fontdesc->storable.drop, dict);
1630 fz_rethrow(ctx);
1631 }
1632 }
1633 }
1634 fz_catch(ctx)
1635 {
1636 pdf_drop_font(ctx, fontdesc);
1637 fz_rethrow(ctx);
1638 }
1639
1640 return fontdesc;
1641 }
1642
1643 void
1644 pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
1645 {
1646 int i;
1647
1648 fz_write_printf(ctx, out, "fontdesc {\n");
1649
1650 if (fontdesc->font->ft_face)
1651 fz_write_printf(ctx, out, "\tfreetype font\n");
1652 if (fontdesc->font->t3procs)
1653 fz_write_printf(ctx, out, "\ttype3 font\n");
1654
1655 fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
1656 fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
1657
1658 fz_write_printf(ctx, out, "\tW {\n");
1659 for (i = 0; i < fontdesc->hmtx_len; i++)
1660 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
1661 fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1662 fz_write_printf(ctx, out, "\t}\n");
1663
1664 if (fontdesc->wmode)
1665 {
1666 fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1667 fz_write_printf(ctx, out, "\tW2 {\n");
1668 for (i = 0; i < fontdesc->vmtx_len; i++)
1669 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1670 fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1671 fz_write_printf(ctx, out, "\t}\n");
1672 }
1673 }