Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-font.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include <assert.h> | |
| 27 | |
| 28 #include <ft2build.h> | |
| 29 #include FT_FREETYPE_H | |
| 30 #include FT_ADVANCES_H | |
| 31 #ifdef FT_FONT_FORMATS_H | |
| 32 #include FT_FONT_FORMATS_H | |
| 33 #else | |
| 34 #include FT_XFREE86_H | |
| 35 #endif | |
| 36 #include FT_TRUETYPE_TABLES_H | |
| 37 | |
| 38 #ifndef FT_SFNT_HEAD | |
| 39 #define FT_SFNT_HEAD ft_sfnt_head | |
| 40 #endif | |
| 41 | |
| 42 void | |
| 43 pdf_load_encoding(const char **estrings, const char *encoding) | |
| 44 { | |
| 45 const char * const *bstrings = NULL; | |
| 46 int i; | |
| 47 | |
| 48 if (!strcmp(encoding, "StandardEncoding")) | |
| 49 bstrings = fz_glyph_name_from_adobe_standard; | |
| 50 if (!strcmp(encoding, "MacRomanEncoding")) | |
| 51 bstrings = fz_glyph_name_from_mac_roman; | |
| 52 if (!strcmp(encoding, "MacExpertEncoding")) | |
| 53 bstrings = fz_glyph_name_from_mac_expert; | |
| 54 if (!strcmp(encoding, "WinAnsiEncoding")) | |
| 55 bstrings = fz_glyph_name_from_win_ansi; | |
| 56 | |
| 57 if (bstrings) | |
| 58 for (i = 0; i < 256; i++) | |
| 59 estrings[i] = bstrings[i]; | |
| 60 } | |
| 61 | |
| 62 static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, | |
| 63 const char *collection, const char *basefont, int iscidfont); | |
| 64 | |
| 65 static const char *base_font_names[][10] = | |
| 66 { | |
| 67 { "Courier", "CourierNew", "CourierNewPSMT", NULL }, | |
| 68 { "Courier-Bold", "CourierNew,Bold", "Courier,Bold", | |
| 69 "CourierNewPS-BoldMT", "CourierNew-Bold", NULL }, | |
| 70 { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic", | |
| 71 "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL }, | |
| 72 { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic", | |
| 73 "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL }, | |
| 74 { "Helvetica", "ArialMT", "Arial", NULL }, | |
| 75 { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold", | |
| 76 "Helvetica,Bold", NULL }, | |
| 77 { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic", | |
| 78 "Helvetica,Italic", "Helvetica-Italic", NULL }, | |
| 79 { "Helvetica-BoldOblique", "Arial-BoldItalicMT", | |
| 80 "Arial,BoldItalic", "Arial-BoldItalic", | |
| 81 "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL }, | |
| 82 { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman", | |
| 83 "TimesNewRomanPS", NULL }, | |
| 84 { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold", | |
| 85 "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL }, | |
| 86 { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic", | |
| 87 "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL }, | |
| 88 { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT", | |
| 89 "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic", | |
| 90 "TimesNewRoman-BoldItalic", NULL }, | |
| 91 { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic", | |
| 92 "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL }, | |
| 93 { "ZapfDingbats", NULL } | |
| 94 }; | |
| 95 | |
| 96 const unsigned char * | |
| 97 pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len) | |
| 98 { | |
| 99 if (mono) { | |
| 100 if (bold) { | |
| 101 if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len); | |
| 102 else return fz_lookup_base14_font(ctx, "Courier-Bold", len); | |
| 103 } else { | |
| 104 if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len); | |
| 105 else return fz_lookup_base14_font(ctx, "Courier", len); | |
| 106 } | |
| 107 } else if (serif) { | |
| 108 if (bold) { | |
| 109 if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len); | |
| 110 else return fz_lookup_base14_font(ctx, "Times-Bold", len); | |
| 111 } else { | |
| 112 if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len); | |
| 113 else return fz_lookup_base14_font(ctx, "Times-Roman", len); | |
| 114 } | |
| 115 } else { | |
| 116 if (bold) { | |
| 117 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len); | |
| 118 else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len); | |
| 119 } else { | |
| 120 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len); | |
| 121 else return fz_lookup_base14_font(ctx, "Helvetica", len); | |
| 122 } | |
| 123 } | |
| 124 } | |
| 125 | |
| 126 static int is_dynalab(char *name) | |
| 127 { | |
| 128 if (strstr(name, "HuaTian")) | |
| 129 return 1; | |
| 130 if (strstr(name, "MingLi")) | |
| 131 return 1; | |
| 132 if ((strstr(name, "DF") == name) || strstr(name, "+DF")) | |
| 133 return 1; | |
| 134 if ((strstr(name, "DLC") == name) || strstr(name, "+DLC")) | |
| 135 return 1; | |
| 136 return 0; | |
| 137 } | |
| 138 | |
| 139 static int strcmp_ignore_space(const char *a, const char *b) | |
| 140 { | |
| 141 while (1) | |
| 142 { | |
| 143 while (*a == ' ') | |
| 144 a++; | |
| 145 while (*b == ' ') | |
| 146 b++; | |
| 147 if (*a != *b) | |
| 148 return 1; | |
| 149 if (*a == 0) | |
| 150 return *a != *b; | |
| 151 if (*b == 0) | |
| 152 return *a != *b; | |
| 153 a++; | |
| 154 b++; | |
| 155 } | |
| 156 } | |
| 157 | |
| 158 const char *pdf_clean_font_name(const char *fontname) | |
| 159 { | |
| 160 int i, k; | |
| 161 for (i = 0; i < (int)nelem(base_font_names); i++) | |
| 162 for (k = 0; base_font_names[i][k]; k++) | |
| 163 if (!strcmp_ignore_space(base_font_names[i][k], fontname)) | |
| 164 return base_font_names[i][0]; | |
| 165 return fontname; | |
| 166 } | |
| 167 | |
| 168 /* | |
| 169 * FreeType and Rendering glue | |
| 170 */ | |
| 171 | |
| 172 enum { UNKNOWN, TYPE1, TRUETYPE }; | |
| 173 | |
| 174 static int ft_kind(fz_context *ctx, FT_Face face) | |
| 175 { | |
| 176 const char *kind; | |
| 177 fz_ft_lock(ctx); | |
| 178 #ifdef FT_FONT_FORMATS_H | |
| 179 kind = FT_Get_Font_Format(face); | |
| 180 #else | |
| 181 kind = FT_Get_X11_Font_Format(face); | |
| 182 #endif | |
| 183 fz_ft_unlock(ctx); | |
| 184 if (!strcmp(kind, "TrueType")) return TRUETYPE; | |
| 185 if (!strcmp(kind, "Type 1")) return TYPE1; | |
| 186 if (!strcmp(kind, "CFF")) return TYPE1; | |
| 187 if (!strcmp(kind, "CID Type 1")) return TYPE1; | |
| 188 return UNKNOWN; | |
| 189 } | |
| 190 | |
| 191 static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid) | |
| 192 { | |
| 193 if (fontdesc->to_ttf_cmap) | |
| 194 { | |
| 195 cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid); | |
| 196 | |
| 197 /* vertical presentation forms */ | |
| 198 if (fontdesc->font->flags.ft_substitute && fontdesc->wmode) | |
| 199 { | |
| 200 switch (cid) | |
| 201 { | |
| 202 case 0x0021: cid = 0xFE15; break; /* ! */ | |
| 203 case 0x0028: cid = 0xFE35; break; /* ( */ | |
| 204 case 0x0029: cid = 0xFE36; break; /* ) */ | |
| 205 case 0x002C: cid = 0xFE10; break; /* , */ | |
| 206 case 0x003A: cid = 0xFE13; break; /* : */ | |
| 207 case 0x003B: cid = 0xFE14; break; /* ; */ | |
| 208 case 0x003F: cid = 0xFE16; break; /* ? */ | |
| 209 case 0x005B: cid = 0xFE47; break; /* [ */ | |
| 210 case 0x005D: cid = 0xFE48; break; /* ] */ | |
| 211 case 0x005F: cid = 0xFE33; break; /* _ */ | |
| 212 case 0x007B: cid = 0xFE37; break; /* { */ | |
| 213 case 0x007D: cid = 0xFE38; break; /* } */ | |
| 214 case 0x2013: cid = 0xFE32; break; /* EN DASH */ | |
| 215 case 0x2014: cid = 0xFE31; break; /* EM DASH */ | |
| 216 case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */ | |
| 217 case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */ | |
| 218 case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */ | |
| 219 case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */ | |
| 220 case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */ | |
| 221 case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */ | |
| 222 case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */ | |
| 223 case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */ | |
| 224 case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */ | |
| 225 case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */ | |
| 226 case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */ | |
| 227 case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */ | |
| 228 case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */ | |
| 229 case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */ | |
| 230 case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */ | |
| 231 case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */ | |
| 232 case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */ | |
| 233 case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */ | |
| 234 | |
| 235 case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */ | |
| 236 case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */ | |
| 237 case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */ | |
| 238 case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */ | |
| 239 case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */ | |
| 240 case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */ | |
| 241 case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */ | |
| 242 case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */ | |
| 243 case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */ | |
| 244 case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */ | |
| 245 case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */ | |
| 246 case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */ | |
| 247 | |
| 248 case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */ | |
| 249 case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */ | |
| 250 } | |
| 251 } | |
| 252 | |
| 253 return ft_char_index(fontdesc->font->ft_face, cid); | |
| 254 } | |
| 255 | |
| 256 if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0) | |
| 257 return fontdesc->cid_to_gid[cid]; | |
| 258 | |
| 259 return cid; | |
| 260 } | |
| 261 | |
| 262 int | |
| 263 pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid) | |
| 264 { | |
| 265 if (fontdesc->font->ft_face) | |
| 266 { | |
| 267 int gid; | |
| 268 fz_ft_lock(ctx); | |
| 269 gid = ft_cid_to_gid(fontdesc, cid); | |
| 270 fz_ft_unlock(ctx); | |
| 271 return gid; | |
| 272 } | |
| 273 return cid; | |
| 274 } | |
| 275 | |
| 276 static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid) | |
| 277 { | |
| 278 int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM; | |
| 279 int gid = ft_cid_to_gid(fontdesc, cid); | |
| 280 FT_Fixed adv = 0; | |
| 281 int fterr; | |
| 282 FT_Face face = fontdesc->font->ft_face; | |
| 283 FT_UShort units_per_EM; | |
| 284 | |
| 285 fterr = FT_Get_Advance(face, gid, mask, &adv); | |
| 286 if (fterr && fterr != FT_Err_Invalid_Argument) | |
| 287 fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr)); | |
| 288 | |
| 289 units_per_EM = face->units_per_EM; | |
| 290 if (units_per_EM == 0) | |
| 291 units_per_EM = 2048; | |
| 292 | |
| 293 return adv * 1000 / units_per_EM; | |
| 294 } | |
| 295 | |
| 296 static const struct { int code; const char *name; } mre_diff_table[] = | |
| 297 { | |
| 298 { 173, "notequal" }, | |
| 299 { 176, "infinity" }, | |
| 300 { 178, "lessequal" }, | |
| 301 { 179, "greaterequal" }, | |
| 302 { 182, "partialdiff" }, | |
| 303 { 183, "summation" }, | |
| 304 { 184, "product" }, | |
| 305 { 185, "pi" }, | |
| 306 { 186, "integral" }, | |
| 307 { 189, "Omega" }, | |
| 308 { 195, "radical" }, | |
| 309 { 197, "approxequal" }, | |
| 310 { 198, "Delta" }, | |
| 311 { 215, "lozenge" }, | |
| 312 { 219, "Euro" }, | |
| 313 { 240, "apple" }, | |
| 314 }; | |
| 315 | |
| 316 static int lookup_mre_code(const char *name) | |
| 317 { | |
| 318 int i; | |
| 319 for (i = 0; i < (int)nelem(mre_diff_table); ++i) | |
| 320 if (!strcmp(name, mre_diff_table[i].name)) | |
| 321 return mre_diff_table[i].code; | |
| 322 for (i = 0; i < 256; i++) | |
| 323 if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i])) | |
| 324 return i; | |
| 325 return -1; | |
| 326 } | |
| 327 | |
| 328 static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name) | |
| 329 { | |
| 330 int unicode, glyph; | |
| 331 | |
| 332 /* Prefer exact unicode match if available. */ | |
| 333 unicode = fz_unicode_from_glyph_name_strict(name); | |
| 334 if (unicode > 0) | |
| 335 { | |
| 336 glyph = ft_char_index(face, unicode); | |
| 337 if (glyph > 0) | |
| 338 return glyph; | |
| 339 } | |
| 340 | |
| 341 /* Fall back to font glyph name if we can. */ | |
| 342 glyph = ft_name_index(face, name); | |
| 343 if (glyph > 0) | |
| 344 return glyph; | |
| 345 | |
| 346 /* Fuzzy unicode match as last attempt. */ | |
| 347 unicode = fz_unicode_from_glyph_name(name); | |
| 348 if (unicode > 0) | |
| 349 return ft_char_index(face, unicode); | |
| 350 | |
| 351 /* Failed. */ | |
| 352 return 0; | |
| 353 } | |
| 354 | |
| 355 /* | |
| 356 * Load font files. | |
| 357 */ | |
| 358 | |
| 359 static void | |
| 360 pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor) | |
| 361 { | |
| 362 FT_Face face; | |
| 363 const char *clean_name = pdf_clean_font_name(fontname); | |
| 364 if (clean_name == fontname) | |
| 365 clean_name = "Times-Roman"; | |
| 366 | |
| 367 fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor); | |
| 368 if (!fontdesc->font) | |
| 369 { | |
| 370 const unsigned char *data; | |
| 371 int len; | |
| 372 | |
| 373 data = fz_lookup_base14_font(ctx, clean_name, &len); | |
| 374 if (!data) | |
| 375 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname); | |
| 376 | |
| 377 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); | |
| 378 fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times"); | |
| 379 } | |
| 380 | |
| 381 if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats")) | |
| 382 fontdesc->flags |= PDF_FD_SYMBOLIC; | |
| 383 | |
| 384 face = fontdesc->font->ft_face; | |
| 385 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; | |
| 386 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; | |
| 387 } | |
| 388 | |
| 389 static void | |
| 390 pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic) | |
| 391 { | |
| 392 fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0); | |
| 393 if (!fontdesc->font) | |
| 394 { | |
| 395 const unsigned char *data; | |
| 396 int len; | |
| 397 | |
| 398 data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len); | |
| 399 if (!data) | |
| 400 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font"); | |
| 401 | |
| 402 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); | |
| 403 fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold; | |
| 404 fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic; | |
| 405 | |
| 406 fontdesc->font->flags.is_mono = mono; | |
| 407 fontdesc->font->flags.is_serif = serif; | |
| 408 fontdesc->font->flags.is_bold = bold; | |
| 409 fontdesc->font->flags.is_italic = italic; | |
| 410 } | |
| 411 | |
| 412 fontdesc->font->flags.ft_substitute = 1; | |
| 413 fontdesc->font->flags.ft_stretch = 1; | |
| 414 } | |
| 415 | |
| 416 static void | |
| 417 pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif) | |
| 418 { | |
| 419 fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif); | |
| 420 if (!fontdesc->font) | |
| 421 { | |
| 422 const unsigned char *data; | |
| 423 int size; | |
| 424 int subfont; | |
| 425 | |
| 426 data = fz_lookup_cjk_font(ctx, ros, &size, &subfont); | |
| 427 if (!data) | |
| 428 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font"); | |
| 429 | |
| 430 /* A glyph bbox cache is too big for CJK fonts. */ | |
| 431 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0); | |
| 432 } | |
| 433 | |
| 434 fontdesc->font->flags.ft_substitute = 1; | |
| 435 fontdesc->font->flags.ft_stretch = 0; | |
| 436 fontdesc->font->flags.cjk = 1; | |
| 437 fontdesc->font->flags.cjk_lang = ros; | |
| 438 } | |
| 439 | |
| 440 static struct { int ros, serif; const char *name; } known_cjk_fonts[] = { | |
| 441 { FZ_ADOBE_GB, 1, "SimFang" }, | |
| 442 { FZ_ADOBE_GB, 0, "SimHei" }, | |
| 443 { FZ_ADOBE_GB, 1, "SimKai" }, | |
| 444 { FZ_ADOBE_GB, 1, "SimLi" }, | |
| 445 { FZ_ADOBE_GB, 1, "SimSun" }, | |
| 446 { FZ_ADOBE_GB, 1, "Song" }, | |
| 447 | |
| 448 { FZ_ADOBE_CNS, 1, "MingLiU" }, | |
| 449 | |
| 450 { FZ_ADOBE_JAPAN, 0, "Gothic" }, | |
| 451 { FZ_ADOBE_JAPAN, 1, "Mincho" }, | |
| 452 | |
| 453 { FZ_ADOBE_KOREA, 1, "Batang" }, | |
| 454 { FZ_ADOBE_KOREA, 0, "Gulim" }, | |
| 455 { FZ_ADOBE_KOREA, 0, "Dotum" }, | |
| 456 }; | |
| 457 | |
| 458 static int match_font_name(const char *s, const char *ref) | |
| 459 { | |
| 460 return !!strstr(s, ref); | |
| 461 } | |
| 462 | |
| 463 static void | |
| 464 pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection) | |
| 465 { | |
| 466 int bold = 0; | |
| 467 int italic = 0; | |
| 468 int serif = 0; | |
| 469 int mono = 0; | |
| 470 | |
| 471 if (strstr(fontname, "Bold")) | |
| 472 bold = 1; | |
| 473 if (strstr(fontname, "Italic")) | |
| 474 italic = 1; | |
| 475 if (strstr(fontname, "Oblique")) | |
| 476 italic = 1; | |
| 477 | |
| 478 if (fontdesc->flags & PDF_FD_FIXED_PITCH) | |
| 479 mono = 1; | |
| 480 if (fontdesc->flags & PDF_FD_SERIF) | |
| 481 serif = 1; | |
| 482 if (fontdesc->flags & PDF_FD_ITALIC) | |
| 483 italic = 1; | |
| 484 if (fontdesc->flags & PDF_FD_FORCE_BOLD) | |
| 485 bold = 1; | |
| 486 | |
| 487 if (collection) | |
| 488 { | |
| 489 if (!strcmp(collection, "Adobe-CNS1")) | |
| 490 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif); | |
| 491 else if (!strcmp(collection, "Adobe-GB1")) | |
| 492 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif); | |
| 493 else if (!strcmp(collection, "Adobe-Japan1")) | |
| 494 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif); | |
| 495 else if (!strcmp(collection, "Adobe-Korea1")) | |
| 496 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif); | |
| 497 else | |
| 498 { | |
| 499 size_t i; | |
| 500 if (strcmp(collection, "Adobe-Identity") != 0) | |
| 501 fz_warn(ctx, "unknown cid collection: %s", collection); | |
| 502 | |
| 503 // Recognize common CJK fonts when using Identity or other non-CJK CMap | |
| 504 for (i = 0; i < nelem(known_cjk_fonts); ++i) | |
| 505 { | |
| 506 if (match_font_name(fontname, known_cjk_fonts[i].name)) | |
| 507 { | |
| 508 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, | |
| 509 known_cjk_fonts[i].ros, known_cjk_fonts[i].serif); | |
| 510 return; | |
| 511 } | |
| 512 } | |
| 513 | |
| 514 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); | |
| 515 } | |
| 516 } | |
| 517 else | |
| 518 { | |
| 519 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); | |
| 520 } | |
| 521 } | |
| 522 | |
| 523 #define TTF_U16(p) ((uint16_t) ((p)[0]<<8) | ((p)[1])) | |
| 524 #define TTF_U32(p) ((uint32_t) ((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | ((p)[3])) | |
| 525 | |
| 526 static fz_buffer * | |
| 527 pdf_extract_cff_subtable(fz_context *ctx, unsigned char *data, size_t size) | |
| 528 { | |
| 529 size_t num_tables = TTF_U16(data + 4); | |
| 530 size_t i; | |
| 531 | |
| 532 if (12 + num_tables * 16 > size) | |
| 533 fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF header"); | |
| 534 | |
| 535 for (i = 0; i < num_tables; ++i) | |
| 536 { | |
| 537 unsigned char *record = data + 12 + i * 16; | |
| 538 if (!memcmp("CFF ", record, 4)) | |
| 539 { | |
| 540 uint64_t offset = TTF_U32(record + 8); | |
| 541 uint64_t length = TTF_U32(record + 12); | |
| 542 uint64_t end = offset + length; | |
| 543 if (end > size) | |
| 544 fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid TTF subtable offset/length"); | |
| 545 return fz_new_buffer_from_copied_data(ctx, data + offset, length); | |
| 546 } | |
| 547 } | |
| 548 | |
| 549 return NULL; | |
| 550 } | |
| 551 | |
| 552 static void | |
| 553 pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref) | |
| 554 { | |
| 555 fz_buffer *buf; | |
| 556 unsigned char *data; | |
| 557 size_t size; | |
| 558 | |
| 559 fz_var(buf); | |
| 560 | |
| 561 buf = pdf_load_stream(ctx, stmref); | |
| 562 | |
| 563 fz_try(ctx) | |
| 564 { | |
| 565 /* Extract CFF subtable for OpenType fonts: */ | |
| 566 size = fz_buffer_storage(ctx, buf, &data); | |
| 567 if (size > 12) { | |
| 568 if (!memcmp("OTTO", data, 4)) { | |
| 569 fz_buffer *cff = pdf_extract_cff_subtable(ctx, data, size); | |
| 570 if (cff) | |
| 571 { | |
| 572 fz_drop_buffer(ctx, buf); | |
| 573 buf = cff; | |
| 574 } | |
| 575 } | |
| 576 } | |
| 577 | |
| 578 fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1); | |
| 579 } | |
| 580 fz_always(ctx) | |
| 581 fz_drop_buffer(ctx, buf); | |
| 582 fz_catch(ctx) | |
| 583 fz_rethrow(ctx); | |
| 584 | |
| 585 fontdesc->size += fz_buffer_storage(ctx, buf, NULL); | |
| 586 fontdesc->is_embedded = 1; | |
| 587 } | |
| 588 | |
| 589 /* | |
| 590 * Create and destroy | |
| 591 */ | |
| 592 | |
| 593 pdf_font_desc * | |
| 594 pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc) | |
| 595 { | |
| 596 return fz_keep_storable(ctx, &fontdesc->storable); | |
| 597 } | |
| 598 | |
| 599 void | |
| 600 pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc) | |
| 601 { | |
| 602 fz_drop_storable(ctx, &fontdesc->storable); | |
| 603 } | |
| 604 | |
| 605 static int | |
| 606 pdf_font_is_droppable(fz_context *ctx, fz_storable *fontdesc) | |
| 607 { | |
| 608 /* If we aren't holding the FT lock, then we can drop. */ | |
| 609 return !fz_ft_lock_held(ctx); | |
| 610 } | |
| 611 | |
| 612 static void | |
| 613 pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_) | |
| 614 { | |
| 615 pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_; | |
| 616 | |
| 617 fz_drop_font(ctx, fontdesc->font); | |
| 618 pdf_drop_cmap(ctx, fontdesc->encoding); | |
| 619 pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap); | |
| 620 pdf_drop_cmap(ctx, fontdesc->to_unicode); | |
| 621 fz_free(ctx, fontdesc->cid_to_gid); | |
| 622 fz_free(ctx, fontdesc->cid_to_ucs); | |
| 623 fz_free(ctx, fontdesc->hmtx); | |
| 624 fz_free(ctx, fontdesc->vmtx); | |
| 625 fz_free(ctx, fontdesc); | |
| 626 } | |
| 627 | |
| 628 pdf_font_desc * | |
| 629 pdf_new_font_desc(fz_context *ctx) | |
| 630 { | |
| 631 pdf_font_desc *fontdesc; | |
| 632 | |
| 633 fontdesc = fz_malloc_struct(ctx, pdf_font_desc); | |
| 634 FZ_INIT_AWKWARD_STORABLE(fontdesc, 1, pdf_drop_font_imp, pdf_font_is_droppable); | |
| 635 fontdesc->size = sizeof(pdf_font_desc); | |
| 636 | |
| 637 fontdesc->font = NULL; | |
| 638 | |
| 639 fontdesc->flags = 0; | |
| 640 fontdesc->italic_angle = 0; | |
| 641 fontdesc->ascent = 800; | |
| 642 fontdesc->descent = -200; | |
| 643 fontdesc->cap_height = 800; | |
| 644 fontdesc->x_height = 500; | |
| 645 fontdesc->missing_width = 0; | |
| 646 | |
| 647 fontdesc->encoding = NULL; | |
| 648 fontdesc->to_ttf_cmap = NULL; | |
| 649 fontdesc->cid_to_gid_len = 0; | |
| 650 fontdesc->cid_to_gid = NULL; | |
| 651 | |
| 652 fontdesc->to_unicode = NULL; | |
| 653 fontdesc->cid_to_ucs_len = 0; | |
| 654 fontdesc->cid_to_ucs = NULL; | |
| 655 | |
| 656 fontdesc->wmode = 0; | |
| 657 | |
| 658 fontdesc->hmtx_cap = 0; | |
| 659 fontdesc->vmtx_cap = 0; | |
| 660 fontdesc->hmtx_len = 0; | |
| 661 fontdesc->vmtx_len = 0; | |
| 662 fontdesc->hmtx = NULL; | |
| 663 fontdesc->vmtx = NULL; | |
| 664 | |
| 665 fontdesc->dhmtx.lo = 0x0000; | |
| 666 fontdesc->dhmtx.hi = 0xFFFF; | |
| 667 fontdesc->dhmtx.w = 1000; | |
| 668 | |
| 669 fontdesc->dvmtx.lo = 0x0000; | |
| 670 fontdesc->dvmtx.hi = 0xFFFF; | |
| 671 fontdesc->dvmtx.x = 0; | |
| 672 fontdesc->dvmtx.y = 880; | |
| 673 fontdesc->dvmtx.w = -1000; | |
| 674 | |
| 675 fontdesc->is_embedded = 0; | |
| 676 | |
| 677 return fontdesc; | |
| 678 } | |
| 679 | |
| 680 /* | |
| 681 * Simple fonts (Type1 and TrueType) | |
| 682 */ | |
| 683 | |
| 684 static FT_CharMap | |
| 685 select_type1_cmap(FT_Face face) | |
| 686 { | |
| 687 int i; | |
| 688 for (i = 0; i < face->num_charmaps; i++) | |
| 689 if (face->charmaps[i]->platform_id == 7) | |
| 690 return face->charmaps[i]; | |
| 691 if (face->num_charmaps > 0) | |
| 692 return face->charmaps[0]; | |
| 693 return NULL; | |
| 694 } | |
| 695 | |
| 696 static FT_CharMap | |
| 697 select_truetype_cmap(fz_context *ctx, FT_Face face, int symbolic) | |
| 698 { | |
| 699 int i; | |
| 700 | |
| 701 /* First look for a Microsoft symbolic cmap, if applicable */ | |
| 702 if (symbolic) | |
| 703 { | |
| 704 for (i = 0; i < face->num_charmaps; i++) | |
| 705 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0) | |
| 706 return face->charmaps[i]; | |
| 707 } | |
| 708 | |
| 709 fz_ft_lock(ctx); | |
| 710 | |
| 711 /* Then look for a Microsoft Unicode cmap */ | |
| 712 for (i = 0; i < face->num_charmaps; i++) | |
| 713 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1) | |
| 714 if (FT_Get_CMap_Format(face->charmaps[i]) != -1) | |
| 715 { | |
| 716 fz_ft_unlock(ctx); | |
| 717 return face->charmaps[i]; | |
| 718 } | |
| 719 | |
| 720 /* Finally look for an Apple MacRoman cmap */ | |
| 721 for (i = 0; i < face->num_charmaps; i++) | |
| 722 if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0) | |
| 723 if (FT_Get_CMap_Format(face->charmaps[i]) != -1) | |
| 724 { | |
| 725 fz_ft_unlock(ctx); | |
| 726 return face->charmaps[i]; | |
| 727 } | |
| 728 | |
| 729 if (face->num_charmaps > 0) | |
| 730 if (FT_Get_CMap_Format(face->charmaps[0]) != -1) | |
| 731 { | |
| 732 fz_ft_unlock(ctx); | |
| 733 return face->charmaps[0]; | |
| 734 } | |
| 735 | |
| 736 fz_ft_unlock(ctx); | |
| 737 return NULL; | |
| 738 } | |
| 739 | |
| 740 static FT_CharMap | |
| 741 select_unknown_cmap(FT_Face face) | |
| 742 { | |
| 743 if (face->num_charmaps > 0) | |
| 744 return face->charmaps[0]; | |
| 745 return NULL; | |
| 746 } | |
| 747 | |
| 748 static int use_s22pdf_workaround(fz_context *ctx, pdf_obj *dict, pdf_obj *descriptor) | |
| 749 { | |
| 750 if (descriptor) | |
| 751 { | |
| 752 if (pdf_dict_get(ctx, dict, PDF_NAME(Encoding)) != PDF_NAME(WinAnsiEncoding)) | |
| 753 return 0; | |
| 754 if (pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) != 4) | |
| 755 return 0; | |
| 756 return 1; | |
| 757 } | |
| 758 return 0; | |
| 759 } | |
| 760 | |
| 761 static pdf_font_desc * | |
| 762 pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) | |
| 763 { | |
| 764 const char *basefont; | |
| 765 pdf_obj *descriptor; | |
| 766 pdf_obj *encoding; | |
| 767 pdf_obj *widths; | |
| 768 unsigned short *etable = NULL; | |
| 769 pdf_font_desc *fontdesc = NULL; | |
| 770 pdf_obj *subtype; | |
| 771 FT_Face face; | |
| 772 FT_CharMap cmap; | |
| 773 int symbolic; | |
| 774 int kind; | |
| 775 int glyph; | |
| 776 | |
| 777 const char *estrings[256]; | |
| 778 char ebuffer[256][32]; | |
| 779 int i, k, n; | |
| 780 int fterr; | |
| 781 int has_lock = 0; | |
| 782 | |
| 783 fz_var(fontdesc); | |
| 784 fz_var(etable); | |
| 785 fz_var(has_lock); | |
| 786 | |
| 787 /* Load font file */ | |
| 788 fz_try(ctx) | |
| 789 { | |
| 790 fontdesc = pdf_new_font_desc(ctx); | |
| 791 | |
| 792 basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont)); | |
| 793 | |
| 794 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); | |
| 795 if (descriptor) | |
| 796 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0); | |
| 797 else | |
| 798 pdf_load_builtin_font(ctx, fontdesc, basefont, 0); | |
| 799 | |
| 800 /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ | |
| 801 if (use_s22pdf_workaround(ctx, dict, descriptor)) | |
| 802 { | |
| 803 char *cp936fonts[] = { | |
| 804 "\xCB\xCE\xCC\xE5", "SimSun,Regular", | |
| 805 "\xBA\xDA\xCC\xE5", "SimHei,Regular", | |
| 806 "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", | |
| 807 "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", | |
| 808 "\xC1\xA5\xCA\xE9", "SimLi,Regular", | |
| 809 NULL | |
| 810 }; | |
| 811 for (i = 0; cp936fonts[i]; i += 2) | |
| 812 if (!strcmp(basefont, cp936fonts[i])) | |
| 813 break; | |
| 814 if (cp936fonts[i]) | |
| 815 { | |
| 816 fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); | |
| 817 pdf_drop_font(ctx, fontdesc); | |
| 818 fontdesc = NULL; | |
| 819 fontdesc = pdf_new_font_desc(ctx); | |
| 820 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0); | |
| 821 fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); | |
| 822 fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); | |
| 823 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); | |
| 824 | |
| 825 goto skip_encoding; | |
| 826 } | |
| 827 } | |
| 828 | |
| 829 face = fontdesc->font->ft_face; | |
| 830 kind = ft_kind(ctx, face); | |
| 831 | |
| 832 /* Encoding */ | |
| 833 | |
| 834 symbolic = fontdesc->flags & 4; | |
| 835 /* Bug 703273: If non-symbolic, we're not symbolic. */ | |
| 836 if (fontdesc->flags & 32) | |
| 837 symbolic = 0; | |
| 838 | |
| 839 if (kind == TYPE1) | |
| 840 cmap = select_type1_cmap(face); | |
| 841 else if (kind == TRUETYPE) | |
| 842 cmap = select_truetype_cmap(ctx, face, symbolic); | |
| 843 else | |
| 844 cmap = select_unknown_cmap(face); | |
| 845 | |
| 846 if (cmap) | |
| 847 { | |
| 848 fz_ft_lock(ctx); | |
| 849 fterr = FT_Set_Charmap(face, cmap); | |
| 850 fz_ft_unlock(ctx); | |
| 851 if (fterr) | |
| 852 fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); | |
| 853 } | |
| 854 else | |
| 855 fz_warn(ctx, "freetype could not find any cmaps"); | |
| 856 | |
| 857 /* FIXME: etable may leak on error. */ | |
| 858 etable = Memento_label(fz_malloc_array(ctx, 256, unsigned short), "cid_to_gid"); | |
| 859 fontdesc->size += 256 * sizeof(unsigned short); | |
| 860 for (i = 0; i < 256; i++) | |
| 861 { | |
| 862 estrings[i] = NULL; | |
| 863 etable[i] = 0; | |
| 864 } | |
| 865 | |
| 866 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); | |
| 867 if (encoding) | |
| 868 { | |
| 869 if (pdf_is_name(ctx, encoding)) | |
| 870 pdf_load_encoding(estrings, pdf_to_name(ctx, encoding)); | |
| 871 | |
| 872 if (pdf_is_dict(ctx, encoding)) | |
| 873 { | |
| 874 pdf_obj *base, *diff, *item; | |
| 875 | |
| 876 base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding)); | |
| 877 if (pdf_is_name(ctx, base)) | |
| 878 pdf_load_encoding(estrings, pdf_to_name(ctx, base)); | |
| 879 else if (!fontdesc->is_embedded && !symbolic) | |
| 880 pdf_load_encoding(estrings, "StandardEncoding"); | |
| 881 | |
| 882 diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences)); | |
| 883 if (pdf_is_array(ctx, diff)) | |
| 884 { | |
| 885 n = pdf_array_len(ctx, diff); | |
| 886 k = 0; | |
| 887 for (i = 0; i < n; i++) | |
| 888 { | |
| 889 item = pdf_array_get(ctx, diff, i); | |
| 890 if (pdf_is_int(ctx, item)) | |
| 891 k = pdf_to_int(ctx, item); | |
| 892 if (pdf_is_name(ctx, item) && k >= 0 && k < (int)nelem(estrings)) | |
| 893 estrings[k++] = pdf_to_name(ctx, item); | |
| 894 } | |
| 895 } | |
| 896 } | |
| 897 } | |
| 898 else if (!fontdesc->is_embedded && !symbolic) | |
| 899 pdf_load_encoding(estrings, "StandardEncoding"); | |
| 900 | |
| 901 fz_ft_lock(ctx); | |
| 902 has_lock = 1; | |
| 903 | |
| 904 /* start with the builtin encoding */ | |
| 905 for (i = 0; i < 256; i++) | |
| 906 etable[i] = ft_char_index(face, i); | |
| 907 | |
| 908 /* built-in and substitute fonts may be a different type than what the document expects */ | |
| 909 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); | |
| 910 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) | |
| 911 kind = TYPE1; | |
| 912 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) | |
| 913 kind = TYPE1; | |
| 914 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) | |
| 915 kind = TRUETYPE; | |
| 916 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) | |
| 917 kind = TYPE1; | |
| 918 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) | |
| 919 kind = TRUETYPE; | |
| 920 | |
| 921 /* encode by glyph name where we can */ | |
| 922 if (kind == TYPE1) | |
| 923 { | |
| 924 for (i = 0; i < 256; i++) | |
| 925 { | |
| 926 if (estrings[i]) | |
| 927 { | |
| 928 glyph = ft_name_index(face, estrings[i]); | |
| 929 if (glyph > 0) | |
| 930 etable[i] = glyph; | |
| 931 } | |
| 932 } | |
| 933 } | |
| 934 | |
| 935 /* encode by glyph name where we can */ | |
| 936 if (kind == TRUETYPE) | |
| 937 { | |
| 938 /* Unicode cmap */ | |
| 939 if (!symbolic && face->charmap && face->charmap->platform_id == 3) | |
| 940 { | |
| 941 for (i = 0; i < 256; i++) | |
| 942 { | |
| 943 if (estrings[i]) | |
| 944 { | |
| 945 glyph = ft_find_glyph_by_unicode_name(face, estrings[i]); | |
| 946 if (glyph > 0) | |
| 947 etable[i] = glyph; | |
| 948 } | |
| 949 } | |
| 950 } | |
| 951 | |
| 952 /* MacRoman cmap */ | |
| 953 else if (!symbolic && face->charmap && face->charmap->platform_id == 1) | |
| 954 { | |
| 955 for (i = 0; i < 256; i++) | |
| 956 { | |
| 957 if (estrings[i]) | |
| 958 { | |
| 959 int mrcode = lookup_mre_code(estrings[i]); | |
| 960 glyph = 0; | |
| 961 if (mrcode > 0) | |
| 962 glyph = ft_char_index(face, mrcode); | |
| 963 if (glyph == 0) | |
| 964 glyph = ft_name_index(face, estrings[i]); | |
| 965 if (glyph > 0) | |
| 966 etable[i] = glyph; | |
| 967 } | |
| 968 } | |
| 969 } | |
| 970 | |
| 971 /* Symbolic cmap */ | |
| 972 else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) | |
| 973 { | |
| 974 for (i = 0; i < 256; i++) | |
| 975 { | |
| 976 if (estrings[i]) | |
| 977 { | |
| 978 glyph = ft_name_index(face, estrings[i]); | |
| 979 if (glyph > 0) | |
| 980 etable[i] = glyph; | |
| 981 } | |
| 982 } | |
| 983 } | |
| 984 } | |
| 985 | |
| 986 /* try to reverse the glyph names from the builtin encoding */ | |
| 987 for (i = 0; i < 256; i++) | |
| 988 { | |
| 989 if (etable[i] && !estrings[i]) | |
| 990 { | |
| 991 if (FT_HAS_GLYPH_NAMES(face)) | |
| 992 { | |
| 993 fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); | |
| 994 if (fterr) | |
| 995 fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); | |
| 996 if (ebuffer[i][0]) | |
| 997 estrings[i] = ebuffer[i]; | |
| 998 } | |
| 999 else | |
| 1000 { | |
| 1001 estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */ | |
| 1002 } | |
| 1003 } | |
| 1004 } | |
| 1005 | |
| 1006 /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ | |
| 1007 if (kind == TYPE1 && symbolic) | |
| 1008 { | |
| 1009 for (i = 0; i < 256; i++) | |
| 1010 if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i])) | |
| 1011 estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i]; | |
| 1012 } | |
| 1013 | |
| 1014 fz_ft_unlock(ctx); | |
| 1015 has_lock = 0; | |
| 1016 | |
| 1017 fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); | |
| 1018 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); | |
| 1019 fontdesc->cid_to_gid_len = 256; | |
| 1020 fontdesc->cid_to_gid = etable; | |
| 1021 | |
| 1022 fz_try(ctx) | |
| 1023 { | |
| 1024 pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode))); | |
| 1025 } | |
| 1026 fz_catch(ctx) | |
| 1027 { | |
| 1028 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 1029 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1030 fz_report_error(ctx); | |
| 1031 fz_warn(ctx, "cannot load ToUnicode CMap"); | |
| 1032 } | |
| 1033 | |
| 1034 skip_encoding: | |
| 1035 | |
| 1036 /* Widths */ | |
| 1037 | |
| 1038 pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); | |
| 1039 | |
| 1040 widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths)); | |
| 1041 if (widths) | |
| 1042 { | |
| 1043 int first, last; | |
| 1044 | |
| 1045 first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar)); | |
| 1046 last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar)); | |
| 1047 | |
| 1048 if (first < 0 || last > 255 || first > last) | |
| 1049 first = last = 0; | |
| 1050 | |
| 1051 for (i = 0; i < last - first + 1; i++) | |
| 1052 { | |
| 1053 int wid = pdf_array_get_int(ctx, widths, i); | |
| 1054 pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); | |
| 1055 } | |
| 1056 } | |
| 1057 else | |
| 1058 { | |
| 1059 fz_ft_lock(ctx); | |
| 1060 has_lock = 1; | |
| 1061 for (i = 0; i < 256; i++) | |
| 1062 pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); | |
| 1063 fz_ft_unlock(ctx); | |
| 1064 has_lock = 0; | |
| 1065 } | |
| 1066 | |
| 1067 pdf_end_hmtx(ctx, fontdesc); | |
| 1068 } | |
| 1069 fz_catch(ctx) | |
| 1070 { | |
| 1071 if (has_lock) | |
| 1072 fz_ft_unlock(ctx); | |
| 1073 if (fontdesc && etable != fontdesc->cid_to_gid) | |
| 1074 fz_free(ctx, etable); | |
| 1075 pdf_drop_font(ctx, fontdesc); | |
| 1076 fz_rethrow(ctx); | |
| 1077 } | |
| 1078 return fontdesc; | |
| 1079 } | |
| 1080 | |
| 1081 static int | |
| 1082 hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_) | |
| 1083 { | |
| 1084 hash->u.pi.i = 0; | |
| 1085 hash->u.pi.ptr = NULL; | |
| 1086 return 1; | |
| 1087 } | |
| 1088 | |
| 1089 static void * | |
| 1090 hail_mary_keep_key(fz_context *ctx, void *key) | |
| 1091 { | |
| 1092 return key; | |
| 1093 } | |
| 1094 | |
| 1095 static void | |
| 1096 hail_mary_drop_key(fz_context *ctx, void *key) | |
| 1097 { | |
| 1098 } | |
| 1099 | |
| 1100 static int | |
| 1101 hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1) | |
| 1102 { | |
| 1103 return k0 == k1; | |
| 1104 } | |
| 1105 | |
| 1106 static void | |
| 1107 hail_mary_format_key(fz_context *ctx, char *s, size_t n, void *key_) | |
| 1108 { | |
| 1109 fz_strlcpy(s, "(hail mary font)", n); | |
| 1110 } | |
| 1111 | |
| 1112 static int hail_mary_store_key; /* Dummy */ | |
| 1113 | |
| 1114 static const fz_store_type hail_mary_store_type = | |
| 1115 { | |
| 1116 "hail-mary", | |
| 1117 hail_mary_make_hash_key, | |
| 1118 hail_mary_keep_key, | |
| 1119 hail_mary_drop_key, | |
| 1120 hail_mary_cmp_key, | |
| 1121 hail_mary_format_key, | |
| 1122 NULL | |
| 1123 }; | |
| 1124 | |
| 1125 pdf_font_desc * | |
| 1126 pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc) | |
| 1127 { | |
| 1128 pdf_font_desc *fontdesc; | |
| 1129 pdf_font_desc *existing; | |
| 1130 | |
| 1131 if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL) | |
| 1132 { | |
| 1133 return fontdesc; | |
| 1134 } | |
| 1135 | |
| 1136 /* FIXME: Get someone with a clue about fonts to fix this */ | |
| 1137 fontdesc = pdf_load_simple_font(ctx, doc, NULL); | |
| 1138 | |
| 1139 existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type); | |
| 1140 assert(existing == NULL); | |
| 1141 (void)existing; /* Silence warning in release builds */ | |
| 1142 | |
| 1143 return fontdesc; | |
| 1144 } | |
| 1145 | |
| 1146 /* | |
| 1147 * CID Fonts | |
| 1148 */ | |
| 1149 | |
| 1150 static pdf_font_desc * | |
| 1151 load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) | |
| 1152 { | |
| 1153 pdf_obj *widths; | |
| 1154 pdf_obj *descriptor; | |
| 1155 pdf_font_desc *fontdesc = NULL; | |
| 1156 fz_buffer *buf = NULL; | |
| 1157 pdf_cmap *cmap; | |
| 1158 FT_Face face; | |
| 1159 char collection[256]; | |
| 1160 const char *basefont; | |
| 1161 int i, k, fterr; | |
| 1162 pdf_obj *cidtogidmap; | |
| 1163 pdf_obj *obj; | |
| 1164 int dw; | |
| 1165 | |
| 1166 fz_var(fontdesc); | |
| 1167 fz_var(buf); | |
| 1168 | |
| 1169 fz_try(ctx) | |
| 1170 { | |
| 1171 /* Get font name and CID collection */ | |
| 1172 | |
| 1173 basefont = pdf_dict_get_name(ctx, dict, PDF_NAME(BaseFont)); | |
| 1174 | |
| 1175 { | |
| 1176 pdf_obj *cidinfo; | |
| 1177 const char *reg, *ord; | |
| 1178 | |
| 1179 cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo)); | |
| 1180 if (cidinfo) | |
| 1181 { | |
| 1182 reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL); | |
| 1183 ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL); | |
| 1184 fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord); | |
| 1185 } | |
| 1186 else | |
| 1187 { | |
| 1188 fz_warn(ctx, "CIDFont is missing CIDSystemInfo dictionary; assuming Adobe-Identity"); | |
| 1189 fz_strlcpy(collection, "Adobe-Identity", sizeof collection); | |
| 1190 } | |
| 1191 } | |
| 1192 | |
| 1193 /* Encoding */ | |
| 1194 | |
| 1195 if (pdf_is_name(ctx, encoding)) | |
| 1196 { | |
| 1197 cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding)); | |
| 1198 } | |
| 1199 else if (pdf_is_indirect(ctx, encoding)) | |
| 1200 { | |
| 1201 cmap = pdf_load_embedded_cmap(ctx, doc, encoding); | |
| 1202 } | |
| 1203 else | |
| 1204 { | |
| 1205 fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding"); | |
| 1206 } | |
| 1207 | |
| 1208 /* Load font file */ | |
| 1209 | |
| 1210 fontdesc = pdf_new_font_desc(ctx); | |
| 1211 | |
| 1212 fontdesc->encoding = cmap; | |
| 1213 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); | |
| 1214 | |
| 1215 pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); | |
| 1216 | |
| 1217 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); | |
| 1218 if (!descriptor) | |
| 1219 fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor"); | |
| 1220 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1); | |
| 1221 | |
| 1222 face = fontdesc->font->ft_face; | |
| 1223 | |
| 1224 /* Apply encoding */ | |
| 1225 | |
| 1226 cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap)); | |
| 1227 if (pdf_is_stream(ctx, cidtogidmap)) | |
| 1228 { | |
| 1229 size_t z, len; | |
| 1230 unsigned char *data; | |
| 1231 | |
| 1232 buf = pdf_load_stream(ctx, cidtogidmap); | |
| 1233 | |
| 1234 len = fz_buffer_storage(ctx, buf, &data); | |
| 1235 fontdesc->cid_to_gid_len = len / 2; | |
| 1236 fontdesc->cid_to_gid = Memento_label(fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short), "cid_to_gid_map"); | |
| 1237 fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); | |
| 1238 for (z = 0; z < fontdesc->cid_to_gid_len; z++) | |
| 1239 fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1]; | |
| 1240 } | |
| 1241 else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap)) | |
| 1242 { | |
| 1243 fz_warn(ctx, "ignoring unknown CIDToGIDMap entry"); | |
| 1244 } | |
| 1245 | |
| 1246 /* if font is external, cidtogidmap should not be identity */ | |
| 1247 /* so we map from cid to unicode and then map that through the (3 1) */ | |
| 1248 /* unicode cmap to get a glyph id */ | |
| 1249 else if (fontdesc->font->flags.ft_substitute) | |
| 1250 { | |
| 1251 fz_ft_lock(ctx); | |
| 1252 fterr = FT_Select_Charmap(face, ft_encoding_unicode); | |
| 1253 fz_ft_unlock(ctx); | |
| 1254 if (fterr) | |
| 1255 fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); | |
| 1256 | |
| 1257 if (!strcmp(collection, "Adobe-CNS1")) | |
| 1258 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); | |
| 1259 else if (!strcmp(collection, "Adobe-GB1")) | |
| 1260 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); | |
| 1261 else if (!strcmp(collection, "Adobe-Japan1")) | |
| 1262 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); | |
| 1263 else if (!strcmp(collection, "Adobe-Japan2")) | |
| 1264 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); | |
| 1265 else if (!strcmp(collection, "Adobe-Korea1")) | |
| 1266 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); | |
| 1267 } | |
| 1268 | |
| 1269 pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode); | |
| 1270 | |
| 1271 /* If we have an identity encoding, we're supposed to use the glyph ids directly. | |
| 1272 * If we only have a substitute font, that won't work. | |
| 1273 * Make a last ditch attempt by using | |
| 1274 * the ToUnicode table if it exists to map via the substitute font's cmap. */ | |
| 1275 if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute) | |
| 1276 { | |
| 1277 if (!fontdesc->to_ttf_cmap) | |
| 1278 { | |
| 1279 if (fontdesc->to_unicode) | |
| 1280 { | |
| 1281 // Use ToUnicode from PDF file if possible. | |
| 1282 fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); | |
| 1283 } | |
| 1284 else | |
| 1285 { | |
| 1286 // Attempt a generic ToUnicode (default MacRoman ordering for TrueType) | |
| 1287 fontdesc->to_ttf_cmap = pdf_load_builtin_cmap(ctx, "TrueType-UCS2"); | |
| 1288 } | |
| 1289 } | |
| 1290 | |
| 1291 if (fontdesc->to_ttf_cmap) | |
| 1292 { | |
| 1293 fz_warn(ctx, "non-embedded font using identity encoding: %s (mapping via %s)", basefont, fontdesc->to_ttf_cmap->cmap_name); | |
| 1294 if (!fontdesc->to_unicode) | |
| 1295 fontdesc->to_unicode = pdf_keep_cmap(ctx, fontdesc->to_ttf_cmap); | |
| 1296 } | |
| 1297 else | |
| 1298 fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); | |
| 1299 } | |
| 1300 | |
| 1301 /* Horizontal */ | |
| 1302 | |
| 1303 dw = pdf_dict_get_int_default(ctx, dict, PDF_NAME(DW), 1000); | |
| 1304 pdf_set_default_hmtx(ctx, fontdesc, dw); | |
| 1305 | |
| 1306 widths = pdf_dict_get(ctx, dict, PDF_NAME(W)); | |
| 1307 if (widths) | |
| 1308 { | |
| 1309 int c0, c1, w, n, m; | |
| 1310 | |
| 1311 n = pdf_array_len(ctx, widths); | |
| 1312 for (i = 0; i < n; ) | |
| 1313 { | |
| 1314 c0 = pdf_array_get_int(ctx, widths, i); | |
| 1315 obj = pdf_array_get(ctx, widths, i + 1); | |
| 1316 if (pdf_is_array(ctx, obj)) | |
| 1317 { | |
| 1318 m = pdf_array_len(ctx, obj); | |
| 1319 for (k = 0; k < m; k++) | |
| 1320 { | |
| 1321 w = pdf_array_get_int(ctx, obj, k); | |
| 1322 pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); | |
| 1323 } | |
| 1324 i += 2; | |
| 1325 } | |
| 1326 else | |
| 1327 { | |
| 1328 c1 = pdf_to_int(ctx, obj); | |
| 1329 w = pdf_array_get_int(ctx, widths, i + 2); | |
| 1330 pdf_add_hmtx(ctx, fontdesc, c0, c1, w); | |
| 1331 i += 3; | |
| 1332 } | |
| 1333 } | |
| 1334 } | |
| 1335 | |
| 1336 pdf_end_hmtx(ctx, fontdesc); | |
| 1337 | |
| 1338 /* Vertical */ | |
| 1339 | |
| 1340 if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) | |
| 1341 { | |
| 1342 int dw2y = 880; | |
| 1343 int dw2w = -1000; | |
| 1344 | |
| 1345 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2)); | |
| 1346 if (obj) | |
| 1347 { | |
| 1348 dw2y = pdf_array_get_int(ctx, obj, 0); | |
| 1349 dw2w = pdf_array_get_int(ctx, obj, 1); | |
| 1350 } | |
| 1351 | |
| 1352 pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); | |
| 1353 | |
| 1354 widths = pdf_dict_get(ctx, dict, PDF_NAME(W2)); | |
| 1355 if (widths) | |
| 1356 { | |
| 1357 int c0, c1, w, x, y, n; | |
| 1358 | |
| 1359 n = pdf_array_len(ctx, widths); | |
| 1360 for (i = 0; i < n; ) | |
| 1361 { | |
| 1362 c0 = pdf_array_get_int(ctx, widths, i); | |
| 1363 obj = pdf_array_get(ctx, widths, i + 1); | |
| 1364 if (pdf_is_array(ctx, obj)) | |
| 1365 { | |
| 1366 int m = pdf_array_len(ctx, obj); | |
| 1367 for (k = 0; k * 3 < m; k ++) | |
| 1368 { | |
| 1369 w = pdf_array_get_int(ctx, obj, k * 3 + 0); | |
| 1370 x = pdf_array_get_int(ctx, obj, k * 3 + 1); | |
| 1371 y = pdf_array_get_int(ctx, obj, k * 3 + 2); | |
| 1372 pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); | |
| 1373 } | |
| 1374 i += 2; | |
| 1375 } | |
| 1376 else | |
| 1377 { | |
| 1378 c1 = pdf_to_int(ctx, obj); | |
| 1379 w = pdf_array_get_int(ctx, widths, i + 2); | |
| 1380 x = pdf_array_get_int(ctx, widths, i + 3); | |
| 1381 y = pdf_array_get_int(ctx, widths, i + 4); | |
| 1382 pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); | |
| 1383 i += 5; | |
| 1384 } | |
| 1385 } | |
| 1386 } | |
| 1387 | |
| 1388 pdf_end_vmtx(ctx, fontdesc); | |
| 1389 } | |
| 1390 } | |
| 1391 fz_always(ctx) | |
| 1392 fz_drop_buffer(ctx, buf); | |
| 1393 fz_catch(ctx) | |
| 1394 { | |
| 1395 pdf_drop_font(ctx, fontdesc); | |
| 1396 fz_rethrow(ctx); | |
| 1397 } | |
| 1398 | |
| 1399 return fontdesc; | |
| 1400 } | |
| 1401 | |
| 1402 static pdf_font_desc * | |
| 1403 pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) | |
| 1404 { | |
| 1405 pdf_obj *dfonts; | |
| 1406 pdf_obj *dfont; | |
| 1407 pdf_obj *subtype; | |
| 1408 pdf_obj *encoding; | |
| 1409 pdf_obj *to_unicode; | |
| 1410 | |
| 1411 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); | |
| 1412 if (!dfonts) | |
| 1413 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts"); | |
| 1414 | |
| 1415 dfont = pdf_array_get(ctx, dfonts, 0); | |
| 1416 | |
| 1417 subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype)); | |
| 1418 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); | |
| 1419 to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)); | |
| 1420 | |
| 1421 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) | |
| 1422 return load_cid_font(ctx, doc, dfont, encoding, to_unicode); | |
| 1423 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) | |
| 1424 return load_cid_font(ctx, doc, dfont, encoding, to_unicode); | |
| 1425 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type"); | |
| 1426 } | |
| 1427 | |
| 1428 /* | |
| 1429 * FontDescriptor | |
| 1430 */ | |
| 1431 | |
| 1432 static void | |
| 1433 pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, | |
| 1434 const char *collection, const char *basefont, int iscidfont) | |
| 1435 { | |
| 1436 pdf_obj *obj1, *obj2, *obj3, *obj; | |
| 1437 const char *fontname; | |
| 1438 FT_Face face; | |
| 1439 | |
| 1440 /* Prefer BaseFont; don't bother with FontName */ | |
| 1441 fontname = basefont; | |
| 1442 | |
| 1443 fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags)); | |
| 1444 fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle)); | |
| 1445 /* fontdesc->ascent and descent have already been set to sane defaults */ | |
| 1446 fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight)); | |
| 1447 fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight)); | |
| 1448 fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth)); | |
| 1449 | |
| 1450 obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile)); | |
| 1451 obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2)); | |
| 1452 obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3)); | |
| 1453 obj = obj1 ? obj1 : obj2 ? obj2 : obj3; | |
| 1454 | |
| 1455 if (pdf_is_indirect(ctx, obj)) | |
| 1456 { | |
| 1457 fz_try(ctx) | |
| 1458 { | |
| 1459 pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj); | |
| 1460 } | |
| 1461 fz_catch(ctx) | |
| 1462 { | |
| 1463 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); | |
| 1464 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); | |
| 1465 fz_report_error(ctx); | |
| 1466 fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font"); | |
| 1467 if (!iscidfont && fontname != pdf_clean_font_name(fontname)) | |
| 1468 pdf_load_builtin_font(ctx, fontdesc, fontname, 1); | |
| 1469 else | |
| 1470 pdf_load_system_font(ctx, fontdesc, fontname, collection); | |
| 1471 } | |
| 1472 } | |
| 1473 else | |
| 1474 { | |
| 1475 if (!iscidfont && fontname != pdf_clean_font_name(fontname)) | |
| 1476 pdf_load_builtin_font(ctx, fontdesc, fontname, 1); | |
| 1477 else | |
| 1478 pdf_load_system_font(ctx, fontdesc, fontname, collection); | |
| 1479 } | |
| 1480 | |
| 1481 /* Check for DynaLab fonts that must use hinting */ | |
| 1482 face = fontdesc->font->ft_face; | |
| 1483 if (ft_kind(ctx, face) == TRUETYPE) | |
| 1484 { | |
| 1485 /* FreeType's own 'tricky' font detection needs a bit of help */ | |
| 1486 if (is_dynalab(fontdesc->font->name)) | |
| 1487 face->face_flags |= FT_FACE_FLAG_TRICKY; | |
| 1488 | |
| 1489 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; | |
| 1490 | |
| 1491 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; | |
| 1492 } | |
| 1493 | |
| 1494 /* Prefer FontDescriptor Ascent/Descent values to embedded font's */ | |
| 1495 fontdesc->ascent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Ascent), fontdesc->ascent); | |
| 1496 fontdesc->descent = pdf_dict_get_real_default(ctx, dict, PDF_NAME(Descent), fontdesc->descent); | |
| 1497 /* Allow for naughty producers that give us a positive descent. */ | |
| 1498 if (fontdesc->descent > 0) | |
| 1499 fontdesc->descent = -fontdesc->descent; | |
| 1500 | |
| 1501 if (fontdesc->ascent <= 0 || fontdesc->ascent > FZ_MAX_TRUSTWORTHY_ASCENT * 1000 || | |
| 1502 fontdesc->descent < FZ_MAX_TRUSTWORTHY_DESCENT * 1000) | |
| 1503 { | |
| 1504 fz_warn(ctx, "bogus font ascent/descent values (%g / %g)", fontdesc->ascent, fontdesc->descent); | |
| 1505 fontdesc->font->ascender = 0.8f; | |
| 1506 fontdesc->font->descender = -0.2f; | |
| 1507 fontdesc->font->ascdesc_src = FZ_ASCDESC_DEFAULT; | |
| 1508 } | |
| 1509 else | |
| 1510 { | |
| 1511 fontdesc->font->ascender = fontdesc->ascent / 1000.0f; | |
| 1512 fontdesc->font->descender = fontdesc->descent / 1000.0f; | |
| 1513 fontdesc->font->ascdesc_src = FZ_ASCDESC_FROM_FONT; | |
| 1514 } | |
| 1515 } | |
| 1516 | |
| 1517 static void | |
| 1518 pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc) | |
| 1519 { | |
| 1520 fz_font *font = fontdesc->font; | |
| 1521 int i, k, n, cid, gid; | |
| 1522 | |
| 1523 n = 0; | |
| 1524 for (i = 0; i < fontdesc->hmtx_len; i++) | |
| 1525 { | |
| 1526 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) | |
| 1527 { | |
| 1528 cid = pdf_lookup_cmap(fontdesc->encoding, k); | |
| 1529 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); | |
| 1530 if (gid > n) | |
| 1531 n = gid; | |
| 1532 } | |
| 1533 } | |
| 1534 | |
| 1535 font->width_count = n + 1; | |
| 1536 font->width_table = Memento_label(fz_malloc_array(ctx, font->width_count, short), "font_widths"); | |
| 1537 fontdesc->size += font->width_count * sizeof(short); | |
| 1538 | |
| 1539 font->width_default = fontdesc->dhmtx.w; | |
| 1540 for (i = 0; i < font->width_count; i++) | |
| 1541 font->width_table[i] = -1; | |
| 1542 | |
| 1543 for (i = 0; i < fontdesc->hmtx_len; i++) | |
| 1544 { | |
| 1545 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) | |
| 1546 { | |
| 1547 cid = pdf_lookup_cmap(fontdesc->encoding, k); | |
| 1548 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); | |
| 1549 if (gid >= 0 && gid < font->width_count) | |
| 1550 font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]); | |
| 1551 } | |
| 1552 } | |
| 1553 | |
| 1554 for (i = 0; i < font->width_count; i++) | |
| 1555 if (font->width_table[i] == -1) | |
| 1556 font->width_table[i] = font->width_default; | |
| 1557 } | |
| 1558 | |
| 1559 pdf_font_desc * | |
| 1560 pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) | |
| 1561 { | |
| 1562 pdf_obj *subtype; | |
| 1563 pdf_obj *dfonts; | |
| 1564 pdf_obj *charprocs; | |
| 1565 pdf_font_desc *fontdesc = NULL; | |
| 1566 int type3 = 0; | |
| 1567 | |
| 1568 if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL) | |
| 1569 { | |
| 1570 if (fontdesc->t3loading) | |
| 1571 { | |
| 1572 pdf_drop_font(ctx, fontdesc); | |
| 1573 fz_throw(ctx, FZ_ERROR_SYNTAX, "recursive type3 font"); | |
| 1574 } | |
| 1575 return fontdesc; | |
| 1576 } | |
| 1577 | |
| 1578 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); | |
| 1579 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); | |
| 1580 charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs)); | |
| 1581 | |
| 1582 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0))) | |
| 1583 fontdesc = pdf_load_type0_font(ctx, doc, dict); | |
| 1584 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) | |
| 1585 fontdesc = pdf_load_simple_font(ctx, doc, dict); | |
| 1586 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) | |
| 1587 fontdesc = pdf_load_simple_font(ctx, doc, dict); | |
| 1588 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) | |
| 1589 fontdesc = pdf_load_simple_font(ctx, doc, dict); | |
| 1590 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3))) | |
| 1591 { | |
| 1592 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); | |
| 1593 type3 = 1; | |
| 1594 } | |
| 1595 else if (charprocs) | |
| 1596 { | |
| 1597 fz_warn(ctx, "unknown font format, guessing type3."); | |
| 1598 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); | |
| 1599 type3 = 1; | |
| 1600 } | |
| 1601 else if (dfonts) | |
| 1602 { | |
| 1603 fz_warn(ctx, "unknown font format, guessing type0."); | |
| 1604 fontdesc = pdf_load_type0_font(ctx, doc, dict); | |
| 1605 } | |
| 1606 else | |
| 1607 { | |
| 1608 fz_warn(ctx, "unknown font format, guessing type1 or truetype."); | |
| 1609 fontdesc = pdf_load_simple_font(ctx, doc, dict); | |
| 1610 } | |
| 1611 | |
| 1612 fz_try(ctx) | |
| 1613 { | |
| 1614 /* Create glyph width table for stretching substitute fonts and text extraction. */ | |
| 1615 pdf_make_width_table(ctx, fontdesc); | |
| 1616 | |
| 1617 pdf_store_item(ctx, dict, fontdesc, fontdesc->size); | |
| 1618 | |
| 1619 /* Load CharProcs */ | |
| 1620 if (type3) | |
| 1621 { | |
| 1622 fontdesc->t3loading = 1; | |
| 1623 fz_try(ctx) | |
| 1624 pdf_load_type3_glyphs(ctx, doc, fontdesc); | |
| 1625 fz_always(ctx) | |
| 1626 fontdesc->t3loading = 0; | |
| 1627 fz_catch(ctx) | |
| 1628 { | |
| 1629 pdf_remove_item(ctx, fontdesc->storable.drop, dict); | |
| 1630 fz_rethrow(ctx); | |
| 1631 } | |
| 1632 } | |
| 1633 } | |
| 1634 fz_catch(ctx) | |
| 1635 { | |
| 1636 pdf_drop_font(ctx, fontdesc); | |
| 1637 fz_rethrow(ctx); | |
| 1638 } | |
| 1639 | |
| 1640 return fontdesc; | |
| 1641 } | |
| 1642 | |
| 1643 void | |
| 1644 pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc) | |
| 1645 { | |
| 1646 int i; | |
| 1647 | |
| 1648 fz_write_printf(ctx, out, "fontdesc {\n"); | |
| 1649 | |
| 1650 if (fontdesc->font->ft_face) | |
| 1651 fz_write_printf(ctx, out, "\tfreetype font\n"); | |
| 1652 if (fontdesc->font->t3procs) | |
| 1653 fz_write_printf(ctx, out, "\ttype3 font\n"); | |
| 1654 | |
| 1655 fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode); | |
| 1656 fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w); | |
| 1657 | |
| 1658 fz_write_printf(ctx, out, "\tW {\n"); | |
| 1659 for (i = 0; i < fontdesc->hmtx_len; i++) | |
| 1660 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n", | |
| 1661 fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w); | |
| 1662 fz_write_printf(ctx, out, "\t}\n"); | |
| 1663 | |
| 1664 if (fontdesc->wmode) | |
| 1665 { | |
| 1666 fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w); | |
| 1667 fz_write_printf(ctx, out, "\tW2 {\n"); | |
| 1668 for (i = 0; i < fontdesc->vmtx_len; i++) | |
| 1669 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi, | |
| 1670 fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w); | |
| 1671 fz_write_printf(ctx, out, "\t}\n"); | |
| 1672 } | |
| 1673 } |
