Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/subset-ttf.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 | |
| 25 /* | |
| 26 For the purposes of this code, and to save my tiny brain from | |
| 27 overload, we will adopt the following notation: | |
| 28 | |
| 29 1) The PDF file contains bytes of data. These bytes are looked | |
| 30 up in the MuPDF font handling to resolve to 'glyph ids' (gids). | |
| 31 These account for all the different encodings etc in use, | |
| 32 including the 'cmap' table within the font. | |
| 33 | |
| 34 2) We are given the list of gids that are used in the document. | |
| 35 We arrange to keep any entries in the cmap or post tables that | |
| 36 maps to these gids. | |
| 37 | |
| 38 We map the gids to the bottom of the range. This means that the | |
| 39 cmap and post tables need to be updated. | |
| 40 | |
| 41 A similar optimisation would be to compress the range of cids | |
| 42 used to a prefix of the range used. This would mean that the | |
| 43 calling code needs to rewrite the data within the PDF file - | |
| 44 both in terms of the strings used with the PDF streams, and in | |
| 45 terms of the ToUnicode tables there (and the Widths etc). | |
| 46 | |
| 47 For now, we'll ignore this optimisation. | |
| 48 | |
| 49 Possibly, in the case of 'Identity' Tounicode mappings we | |
| 50 wouldn't actually want to do this range compression? It'd only | |
| 51 make the file larger. | |
| 52 */ | |
| 53 | |
| 54 typedef struct | |
| 55 { | |
| 56 uint16_t pid; | |
| 57 uint16_t psid; | |
| 58 | |
| 59 uint32_t max; | |
| 60 uint16_t gid[256]; | |
| 61 } encoding_t; | |
| 62 | |
| 63 typedef struct | |
| 64 { | |
| 65 uint32_t tag; | |
| 66 uint32_t checksum; | |
| 67 fz_buffer *tab; | |
| 68 } tagged_table_t; | |
| 69 | |
| 70 typedef struct | |
| 71 { | |
| 72 int is_otf; | |
| 73 int symbolic; | |
| 74 encoding_t *encoding; | |
| 75 uint16_t orig_num_glyphs; | |
| 76 uint16_t new_num_glyphs; | |
| 77 uint16_t index_to_loc_format; | |
| 78 uint8_t *index_to_loc_formatp; | |
| 79 uint16_t orig_num_long_hor_metrics; | |
| 80 uint16_t new_num_long_hor_metrics; | |
| 81 | |
| 82 /* Pointer to the old tables (in the tagged table below) */ | |
| 83 uint8_t *loca; | |
| 84 size_t *loca_len; | |
| 85 uint8_t *maxp; | |
| 86 | |
| 87 /* Maps from old gid to new gid */ | |
| 88 uint16_t *gid_renum; | |
| 89 | |
| 90 int max; | |
| 91 int len; | |
| 92 tagged_table_t *table; | |
| 93 } ttf_t; | |
| 94 | |
| 95 static uint32_t | |
| 96 checksum(fz_buffer *buf) | |
| 97 { | |
| 98 size_t i; | |
| 99 const uint8_t *d = (const uint8_t *)buf->data; | |
| 100 uint32_t cs = 0; | |
| 101 | |
| 102 for (i = buf->len>>2; i > 0; i--) | |
| 103 { | |
| 104 cs += d[0]<<24; | |
| 105 cs += d[1]<<16; | |
| 106 cs += d[2]<<8; | |
| 107 cs += d[3]; | |
| 108 d += 4; | |
| 109 } | |
| 110 i = buf->len - (buf->len & ~3); | |
| 111 switch (i) | |
| 112 { | |
| 113 case 3: | |
| 114 cs += d[2]<<8; | |
| 115 /* fallthrough */ | |
| 116 case 2: | |
| 117 cs += d[1]<<16; | |
| 118 /* fallthrough */ | |
| 119 case 1: | |
| 120 cs += d[0]<<24; | |
| 121 default: | |
| 122 break; | |
| 123 } | |
| 124 | |
| 125 return cs; | |
| 126 } | |
| 127 | |
| 128 static uint32_t | |
| 129 find_table(fz_context *ctx, fz_stream *stm, uint32_t tag, uint32_t *len) | |
| 130 { | |
| 131 int num_tables; | |
| 132 int i; | |
| 133 | |
| 134 fz_seek(ctx, stm, 4, SEEK_SET); | |
| 135 num_tables = fz_read_int16(ctx, stm); | |
| 136 fz_seek(ctx, stm, 12, SEEK_SET); | |
| 137 | |
| 138 for (i = 0; i < num_tables; i++) | |
| 139 { | |
| 140 uint32_t t = fz_read_uint32(ctx, stm); | |
| 141 uint32_t cs = fz_read_uint32(ctx, stm); | |
| 142 uint32_t off = fz_read_uint32(ctx, stm); | |
| 143 (void) cs; /* UNUSED */ | |
| 144 *len = fz_read_uint32(ctx, stm); | |
| 145 if (t == tag) | |
| 146 return off; | |
| 147 } | |
| 148 | |
| 149 return 0; | |
| 150 } | |
| 151 | |
| 152 static fz_buffer * | |
| 153 read_table(fz_context *ctx, fz_stream *stm, uint32_t tag, int compulsory) | |
| 154 { | |
| 155 uint32_t size; | |
| 156 uint32_t off = find_table(ctx, stm, tag, &size); | |
| 157 fz_buffer *buf; | |
| 158 | |
| 159 if (off == 0) | |
| 160 { | |
| 161 if (compulsory) | |
| 162 fz_throw(ctx, FZ_ERROR_FORMAT, "Required %c%c%c%c table missing", tag>>24, (tag>>16)&0xff, (tag>>8)&0xff, tag & 0xff); | |
| 163 return NULL; | |
| 164 } | |
| 165 | |
| 166 fz_seek(ctx, stm, off, SEEK_SET); | |
| 167 buf = fz_new_buffer(ctx, size); | |
| 168 | |
| 169 fz_try(ctx) | |
| 170 { | |
| 171 fz_read(ctx, stm, buf->data, size); | |
| 172 buf->len = size; | |
| 173 } | |
| 174 fz_catch(ctx) | |
| 175 { | |
| 176 fz_drop_buffer(ctx, buf); | |
| 177 fz_rethrow(ctx); | |
| 178 } | |
| 179 | |
| 180 return buf; | |
| 181 } | |
| 182 | |
| 183 #define TAG(s) \ | |
| 184 ( (((uint8_t)s[0])<<24) | \ | |
| 185 (((uint8_t)s[1])<<16) | \ | |
| 186 (((uint8_t)s[2])<<8) | \ | |
| 187 (((uint8_t)s[3]))) | |
| 188 | |
| 189 static void | |
| 190 add_table(fz_context *ctx, ttf_t *ttf, uint32_t tag, fz_buffer *tab) | |
| 191 { | |
| 192 fz_try(ctx) | |
| 193 { | |
| 194 if (ttf->max == ttf->len) | |
| 195 { | |
| 196 int n = ttf->max * 2; | |
| 197 if (n == 0) | |
| 198 n = 16; | |
| 199 ttf->table = fz_realloc(ctx, ttf->table, sizeof(*ttf->table) * n); | |
| 200 ttf->max = n; | |
| 201 } | |
| 202 | |
| 203 ttf->table[ttf->len].tag = tag; | |
| 204 ttf->table[ttf->len].tab = tab; | |
| 205 ttf->len++; | |
| 206 } | |
| 207 fz_catch(ctx) | |
| 208 { | |
| 209 fz_drop_buffer(ctx, tab); | |
| 210 fz_rethrow(ctx); | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 static void | |
| 215 copy_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm, uint32_t tag, int compulsory) | |
| 216 { | |
| 217 fz_buffer *t; | |
| 218 | |
| 219 t = read_table(ctx, stm, tag, compulsory); | |
| 220 if (t) | |
| 221 add_table(ctx, ttf, tag, t); | |
| 222 } | |
| 223 | |
| 224 static int | |
| 225 tabcmp(const void *a_, const void *b_) | |
| 226 { | |
| 227 const tagged_table_t *a = (const tagged_table_t *)a_; | |
| 228 const tagged_table_t *b = (const tagged_table_t *)b_; | |
| 229 | |
| 230 return (a->tag - b->tag); | |
| 231 } | |
| 232 | |
| 233 static void | |
| 234 sort_tables(fz_context *ctx, ttf_t *ttf) | |
| 235 { | |
| 236 /* Avoid scanbuild/coverity false warning with this unnecessary test */ | |
| 237 if (ttf->table == NULL || ttf->len == 0) | |
| 238 return; | |
| 239 qsort(ttf->table, ttf->len, sizeof(tagged_table_t), tabcmp); | |
| 240 } | |
| 241 | |
| 242 static void | |
| 243 checksum_tables(fz_context *ctx, ttf_t *ttf) | |
| 244 { | |
| 245 int i; | |
| 246 | |
| 247 for (i = 0; i < ttf->len; i++) | |
| 248 ttf->table[i].checksum = checksum(ttf->table[i].tab); | |
| 249 } | |
| 250 | |
| 251 static void | |
| 252 write_tables(fz_context *ctx, ttf_t *ttf, fz_output *out) | |
| 253 { | |
| 254 int i = 0; | |
| 255 uint32_t offset; | |
| 256 | |
| 257 /* scalar type - TTF for now - may need to cope with other types later. */ | |
| 258 if (ttf->is_otf) | |
| 259 fz_write_int32_be(ctx, out, 0x4f54544f); | |
| 260 else | |
| 261 fz_write_int32_be(ctx, out, 0x00010000); | |
| 262 | |
| 263 /* number of tables */ | |
| 264 fz_write_uint16_be(ctx, out, ttf->len); | |
| 265 | |
| 266 while (1<<(i+1) <= ttf->len) | |
| 267 i++; | |
| 268 | |
| 269 /* searchRange */ | |
| 270 fz_write_uint16_be(ctx, out, (1<<i)<<4); | |
| 271 | |
| 272 /* entrySelector */ | |
| 273 fz_write_uint16_be(ctx, out, i); | |
| 274 | |
| 275 /* rangeShift*/ | |
| 276 fz_write_uint16_be(ctx, out, (ttf->len - (1<<i))<<4); | |
| 277 | |
| 278 /* Table directory */ | |
| 279 offset = 12 + ttf->len * 16; | |
| 280 for (i = 0; i < ttf->len; i++) | |
| 281 { | |
| 282 fz_write_uint32_be(ctx, out, ttf->table[i].tag); | |
| 283 fz_write_uint32_be(ctx, out, ttf->table[i].checksum); | |
| 284 fz_write_uint32_be(ctx, out, offset); | |
| 285 fz_write_uint32_be(ctx, out, (uint32_t)ttf->table[i].tab->len); | |
| 286 offset += (uint32_t)ttf->table[i].tab->len; | |
| 287 } | |
| 288 | |
| 289 /* Now the tables in turn */ | |
| 290 for (i = 0; i < ttf->len; i++) | |
| 291 { | |
| 292 fz_write_buffer(ctx, out, ttf->table[i].tab); | |
| 293 } | |
| 294 } | |
| 295 | |
| 296 static void | |
| 297 fix_checksum(fz_context *ctx, fz_buffer *buf) | |
| 298 { | |
| 299 uint8_t *data; | |
| 300 uint32_t sum = 0; | |
| 301 size_t len = fz_buffer_storage(ctx, buf, &data); | |
| 302 uint32_t namesize; | |
| 303 fz_stream *stm = fz_open_buffer(ctx, buf); | |
| 304 uint32_t csumpos = find_table(ctx, stm, TAG("head"), &namesize) + 8; | |
| 305 | |
| 306 (void) len; // UNUSED | |
| 307 | |
| 308 fz_drop_stream(ctx, stm); | |
| 309 | |
| 310 /* First off, blat the old checksum */ | |
| 311 memset(data+csumpos, 0, 4); | |
| 312 | |
| 313 sum = checksum(buf); | |
| 314 sum = 0xb1b0afba-sum; | |
| 315 | |
| 316 /* Insert it. */ | |
| 317 data[csumpos] = sum>>24; | |
| 318 data[csumpos+1] = sum>>16; | |
| 319 data[csumpos+2] = sum>>8; | |
| 320 data[csumpos+3] = sum; | |
| 321 } | |
| 322 | |
| 323 typedef struct | |
| 324 { | |
| 325 uint16_t platform_id; | |
| 326 uint16_t platform_specific_id; | |
| 327 uint16_t language_id; | |
| 328 uint16_t name_id; | |
| 329 uint16_t len; | |
| 330 uint16_t offset; | |
| 331 } name_record_t; | |
| 332 | |
| 333 static uint32_t get32(const uint8_t *d) | |
| 334 { | |
| 335 return (d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3]; | |
| 336 } | |
| 337 | |
| 338 static uint32_t get16(const uint8_t *d) | |
| 339 { | |
| 340 return (d[0]<<8)|d[1]; | |
| 341 } | |
| 342 | |
| 343 static void put32(uint8_t *d, uint32_t v) | |
| 344 { | |
| 345 d[0] = v>>24; | |
| 346 d[1] = v>>16; | |
| 347 d[2] = v>>8; | |
| 348 d[3] = v; | |
| 349 } | |
| 350 | |
| 351 static void put16(uint8_t *d, uint32_t v) | |
| 352 { | |
| 353 d[0] = v>>8; | |
| 354 d[1] = v; | |
| 355 } | |
| 356 | |
| 357 typedef struct | |
| 358 { | |
| 359 /* First 2 fields aren't actually needed for the pointer list | |
| 360 * operation, but they serve as bounds for all the offsets used | |
| 361 * within the ptr list. */ | |
| 362 uint8_t *block; | |
| 363 size_t block_len; | |
| 364 | |
| 365 uint32_t len; | |
| 366 uint32_t max; | |
| 367 uint8_t **ptr; | |
| 368 } ptr_list_t; | |
| 369 | |
| 370 static void | |
| 371 ptr_list_add(fz_context *ctx, ptr_list_t *pl, uint8_t *ptr) | |
| 372 { | |
| 373 if (pl->len == pl->max) | |
| 374 { | |
| 375 int n = pl->max * 2; | |
| 376 if (n == 0) | |
| 377 n = 32; | |
| 378 pl->ptr = fz_realloc(ctx, pl->ptr, sizeof(*pl->ptr) * n); | |
| 379 pl->max = n; | |
| 380 } | |
| 381 pl->ptr[pl->len++] = ptr; | |
| 382 } | |
| 383 | |
| 384 typedef int (cmp_t)(const uint8_t **a, const uint8_t **b); | |
| 385 typedef int (void_cmp_t)(const void *, const void *); | |
| 386 | |
| 387 static void | |
| 388 ptr_list_sort(fz_context *ctx, ptr_list_t *pl, cmp_t *cmp) | |
| 389 { | |
| 390 /* Avoid scanbuild/coverity false warning with this unnecessary test */ | |
| 391 if (pl->ptr == NULL || pl->len == 0) | |
| 392 return; | |
| 393 qsort(pl->ptr, pl->len, sizeof(*pl->ptr), (void_cmp_t *)cmp); | |
| 394 } | |
| 395 | |
| 396 static void | |
| 397 drop_ptr_list(fz_context *ctx, ptr_list_t *pl) | |
| 398 { | |
| 399 fz_free(ctx, pl->ptr); | |
| 400 } | |
| 401 | |
| 402 /* return 1 to keep, 0 to drop. */ | |
| 403 typedef int (filter_t)(const uint8_t *ptr, const uint8_t *blk, size_t len); | |
| 404 | |
| 405 /* This makes a pointer list from a filtered block, moving the underlying data as it filters. */ | |
| 406 static void | |
| 407 ptr_list_compact(fz_context *ctx, ptr_list_t *pl, filter_t *fil, uint8_t *base, int n, size_t eltsize, uint8_t *block, size_t block_len) | |
| 408 { | |
| 409 int i; | |
| 410 uint8_t *s = base; | |
| 411 uint8_t *d = base; | |
| 412 | |
| 413 pl->block = block; | |
| 414 pl->block_len = block_len; | |
| 415 | |
| 416 if (base < block || (size_t)(base - block) > block_len || (size_t)(base - block) + n * eltsize >= block_len) | |
| 417 fz_throw(ctx, FZ_ERROR_FORMAT, "Ptr List creation failed"); | |
| 418 | |
| 419 for (i = 0; i < n; i++) | |
| 420 { | |
| 421 if (fil(s, block, block_len)) | |
| 422 { | |
| 423 ptr_list_add(ctx, pl, d); | |
| 424 if (s != d) | |
| 425 memmove(d, s, eltsize); | |
| 426 d += eltsize; | |
| 427 } | |
| 428 s += eltsize; | |
| 429 } | |
| 430 } | |
| 431 | |
| 432 static int | |
| 433 names_by_size(const uint8_t **a, const uint8_t **b) | |
| 434 { | |
| 435 return get16((*b)+8) - get16((*a)+8); | |
| 436 } | |
| 437 | |
| 438 static int | |
| 439 filter_name_tables(const uint8_t *ptr, const uint8_t *block, size_t block_len) | |
| 440 { | |
| 441 /* FIXME: For now, we keep everything. */ | |
| 442 return 1; | |
| 443 } | |
| 444 | |
| 445 #define UNFOUND ((uint32_t)-1) | |
| 446 | |
| 447 static uint32_t | |
| 448 find_string_in_block(const uint8_t *str, size_t str_len, const uint8_t *block, size_t block_len) | |
| 449 { | |
| 450 const uint8_t *b = block; | |
| 451 | |
| 452 if (block_len == 0) | |
| 453 return UNFOUND; | |
| 454 | |
| 455 assert(block_len >= str_len); | |
| 456 | |
| 457 block_len -= str_len-1; | |
| 458 | |
| 459 while (block_len--) | |
| 460 { | |
| 461 if (!memcmp(str, b, str_len)) | |
| 462 return (uint32_t)(b - block); | |
| 463 b++; | |
| 464 } | |
| 465 | |
| 466 return UNFOUND; | |
| 467 } | |
| 468 | |
| 469 static void | |
| 470 subset_name_table(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 471 { | |
| 472 fz_buffer *t = read_table(ctx, stm, TAG("name"), 0); | |
| 473 uint8_t *d; | |
| 474 uint32_t i, n, off; | |
| 475 ptr_list_t pl = { 0 }; | |
| 476 size_t name_data_size; | |
| 477 uint8_t *new_name_data = NULL; | |
| 478 size_t new_len; | |
| 479 | |
| 480 if (t == NULL) | |
| 481 return; /* No name table */ | |
| 482 | |
| 483 d = t->data; | |
| 484 | |
| 485 fz_var(new_name_data); | |
| 486 | |
| 487 fz_try(ctx) | |
| 488 { | |
| 489 if (get16(d) != 0 || t->len < 6) | |
| 490 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported name table format"); | |
| 491 | |
| 492 n = get16(d+2); | |
| 493 off = get16(d+4); | |
| 494 name_data_size = t->len - 6 - 12*n; | |
| 495 | |
| 496 if (t->len < 6 + 12*n) | |
| 497 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name table"); | |
| 498 | |
| 499 ptr_list_compact(ctx, &pl, filter_name_tables, d+6, n, 12, d, t->len); | |
| 500 | |
| 501 /* Sort our list so that the ones with the largest name data blocks come first. */ | |
| 502 ptr_list_sort(ctx, &pl, names_by_size); | |
| 503 | |
| 504 new_name_data = fz_malloc(ctx, name_data_size); | |
| 505 new_len = 0; | |
| 506 for (i = 0; i < pl.len; i++) | |
| 507 { | |
| 508 uint32_t name_len, offset, name_off; | |
| 509 uint8_t *name; | |
| 510 | |
| 511 if (t->len < (size_t) (pl.ptr[i] - t->data) + 8 + 2) | |
| 512 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name length in name table"); | |
| 513 name_len = get16(pl.ptr[i] + 8); | |
| 514 | |
| 515 if (t->len < (size_t) (pl.ptr[i] - t->data) + 10 + 2) | |
| 516 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name offset in name table"); | |
| 517 name_off = off + get16(pl.ptr[i] + 10); | |
| 518 name = d + name_off; | |
| 519 | |
| 520 if (t->len < name_off + name_len) | |
| 521 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated name in name table"); | |
| 522 offset = find_string_in_block(name, name_len, new_name_data, new_len); | |
| 523 if (offset == UNFOUND) | |
| 524 { | |
| 525 if (name_data_size < new_len + name_len) | |
| 526 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad name table in TTF"); | |
| 527 memcpy(new_name_data + new_len, name, name_len); | |
| 528 offset = (uint32_t)new_len; | |
| 529 new_len += name_len; | |
| 530 } | |
| 531 put16(pl.ptr[i]+10, offset); | |
| 532 } | |
| 533 memcpy(d + 6 + 12*pl.len, new_name_data, new_len); | |
| 534 t->len = 6 + 12*pl.len + new_len; | |
| 535 put16(d+4, 6 + 12*pl.len); | |
| 536 } | |
| 537 fz_always(ctx) | |
| 538 { | |
| 539 drop_ptr_list(ctx, &pl); | |
| 540 fz_free(ctx, new_name_data); | |
| 541 } | |
| 542 fz_catch(ctx) | |
| 543 { | |
| 544 fz_drop_buffer(ctx, t); | |
| 545 fz_rethrow(ctx); | |
| 546 } | |
| 547 | |
| 548 add_table(ctx, ttf, TAG("name"), t); | |
| 549 } | |
| 550 | |
| 551 static encoding_t * | |
| 552 load_enc_tab0(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) | |
| 553 { | |
| 554 encoding_t *enc; | |
| 555 int i; | |
| 556 | |
| 557 if (data_size < 262) | |
| 558 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap 0 format table"); | |
| 559 | |
| 560 enc = fz_malloc_struct(ctx, encoding_t); | |
| 561 d += offset + 6; | |
| 562 | |
| 563 enc->max = 256; | |
| 564 for (i = 0; i < 256; i++) | |
| 565 enc->gid[i] = d[i]; | |
| 566 | |
| 567 return enc; | |
| 568 } | |
| 569 | |
| 570 static encoding_t * | |
| 571 load_enc_tab4(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) | |
| 572 { | |
| 573 encoding_t *enc; | |
| 574 uint16_t seg_count; | |
| 575 uint32_t i; | |
| 576 | |
| 577 if (data_size < offset + 26) | |
| 578 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); | |
| 579 | |
| 580 seg_count = get16(d+offset+6); /* 2 * seg_count */ | |
| 581 | |
| 582 if (seg_count & 1) | |
| 583 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table"); | |
| 584 seg_count >>= 1; | |
| 585 | |
| 586 enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (65536 - 256)); | |
| 587 enc->max = 65536; | |
| 588 | |
| 589 fz_try(ctx) | |
| 590 { | |
| 591 /* Run through the segments, counting how many are used. */ | |
| 592 for (i = 0; i < seg_count; i++) | |
| 593 { | |
| 594 uint16_t seg_end, seg_start, delta, target, inner_offset; | |
| 595 uint32_t offset_ptr, s; | |
| 596 | |
| 597 if (data_size < offset + 14 + 6 * seg_count + 2 + 2 * i + 2) | |
| 598 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); | |
| 599 | |
| 600 seg_end = get16(d + offset + 14 + 2 * i); | |
| 601 seg_start = get16(d + offset + 14 + 2 * seg_count + 2 + 2 * i); | |
| 602 delta = get16(d + offset + 14 + 4 * seg_count + 2 + 2 * i); | |
| 603 offset_ptr = offset + 14 + 6 * seg_count + 2 + 2 * i; | |
| 604 inner_offset = get16(d + offset_ptr); | |
| 605 | |
| 606 if (seg_start >= enc->max || seg_end >= enc->max || seg_end < seg_start) | |
| 607 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap4 table."); | |
| 608 | |
| 609 for (s = seg_start; s <= seg_end; s++) | |
| 610 { | |
| 611 if (inner_offset == 0) | |
| 612 { | |
| 613 target = delta + s; | |
| 614 } | |
| 615 else | |
| 616 { | |
| 617 if (data_size < offset_ptr + inner_offset + 2 * (s - seg_start) + 2) | |
| 618 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap4 too small"); | |
| 619 | |
| 620 /* Yes. This is very screwy. The inner_offset is from the offset_ptr in use. */ | |
| 621 target = get16(d + offset_ptr + inner_offset + 2 * (s - seg_start)); | |
| 622 if (target != 0) | |
| 623 target += delta; | |
| 624 } | |
| 625 | |
| 626 if (target != 0) | |
| 627 enc->gid[s] = target; | |
| 628 } | |
| 629 } | |
| 630 } | |
| 631 fz_catch(ctx) | |
| 632 { | |
| 633 fz_free(ctx, enc); | |
| 634 fz_rethrow(ctx); | |
| 635 } | |
| 636 | |
| 637 return enc; | |
| 638 } | |
| 639 | |
| 640 static encoding_t * | |
| 641 load_enc_tab6(fz_context *ctx, uint8_t *d, size_t data_size, uint32_t offset) | |
| 642 { | |
| 643 encoding_t *enc; | |
| 644 uint16_t first_code; | |
| 645 uint16_t entry_count; | |
| 646 uint16_t length; | |
| 647 uint32_t i; | |
| 648 | |
| 649 if (data_size < 10) | |
| 650 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap6 too small"); | |
| 651 | |
| 652 length = get16(d+offset+2); | |
| 653 first_code = get16(d+offset+6); | |
| 654 entry_count = get16(d+offset+8); | |
| 655 | |
| 656 if (length < entry_count*2 + 10) | |
| 657 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed cmap6 table"); | |
| 658 | |
| 659 enc = fz_calloc(ctx, 1, sizeof(encoding_t) + sizeof(uint16_t) * (first_code + entry_count - 256)); | |
| 660 enc->max = first_code + entry_count; | |
| 661 | |
| 662 /* Run through the segments, counting how many are used. */ | |
| 663 for (i = 0; i < entry_count; i++) | |
| 664 { | |
| 665 enc->gid[first_code+i] = get16(d+offset+10+i*2); | |
| 666 } | |
| 667 | |
| 668 return enc; | |
| 669 } | |
| 670 | |
| 671 static int | |
| 672 is_encoding_all_zeros(fz_context *ctx, encoding_t *enc) | |
| 673 { | |
| 674 uint32_t i; | |
| 675 | |
| 676 if (enc != NULL) | |
| 677 for (i = 0; i < enc->max; i++) | |
| 678 if (enc->gid[i] != 0) | |
| 679 return 0; | |
| 680 | |
| 681 return 1; | |
| 682 } | |
| 683 | |
| 684 | |
| 685 static encoding_t * | |
| 686 load_enc(fz_context *ctx, fz_buffer *t, int pid, int psid) | |
| 687 { | |
| 688 uint8_t *d = t->data; | |
| 689 size_t data_size = t->len; | |
| 690 uint32_t i, n; | |
| 691 | |
| 692 if (data_size < 6 || get16(d) != 0) | |
| 693 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format"); | |
| 694 | |
| 695 n = get16(d+2); | |
| 696 | |
| 697 if (data_size < 4 + 8*n) | |
| 698 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated cmap table"); | |
| 699 | |
| 700 for (i = 0; i < n; i++) | |
| 701 { | |
| 702 uint16_t plat_id = get16(d + 4 + i * 8); | |
| 703 uint16_t plat_spec_id = get16(d + 4 + i * 8 + 2); | |
| 704 uint32_t offset = get32(d + 4 + i * 8 + 4); | |
| 705 uint16_t fmt; | |
| 706 encoding_t *enc; | |
| 707 | |
| 708 if (plat_id != pid || plat_spec_id != psid) | |
| 709 continue; | |
| 710 | |
| 711 if (offset < 4 + 8 * n || offset + 2 >= data_size) | |
| 712 fz_throw(ctx, FZ_ERROR_FORMAT, "cmap table data out of range"); | |
| 713 | |
| 714 fmt = get16(d+offset); | |
| 715 switch(fmt) | |
| 716 { | |
| 717 case 0: | |
| 718 enc = load_enc_tab0(ctx, d, data_size, offset); | |
| 719 break; | |
| 720 case 4: | |
| 721 enc = load_enc_tab4(ctx, d, data_size, offset); | |
| 722 break; | |
| 723 case 6: | |
| 724 enc = load_enc_tab6(ctx, d, data_size, offset); | |
| 725 break; | |
| 726 default: | |
| 727 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported cmap table format %d", fmt); | |
| 728 } | |
| 729 | |
| 730 enc->pid = pid; | |
| 731 enc->psid = psid; | |
| 732 | |
| 733 if (is_encoding_all_zeros(ctx, enc)) | |
| 734 { | |
| 735 // ignore any encoding that is all zeros | |
| 736 fz_free(ctx, enc); | |
| 737 enc = NULL; | |
| 738 } | |
| 739 | |
| 740 return enc; | |
| 741 } | |
| 742 | |
| 743 return NULL; | |
| 744 } | |
| 745 | |
| 746 static void | |
| 747 load_encoding(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 748 { | |
| 749 fz_buffer *t = read_table(ctx, stm, TAG("cmap"), 1); | |
| 750 encoding_t *enc = NULL; | |
| 751 | |
| 752 fz_var(enc); | |
| 753 | |
| 754 fz_try(ctx) | |
| 755 { | |
| 756 if (ttf->symbolic) | |
| 757 { | |
| 758 /* For symbolic fonts, we look for (3,0) as per PDF Spec, then (1,0). */ | |
| 759 enc = load_enc(ctx, t, 3, 0); | |
| 760 if (!enc) | |
| 761 enc = load_enc(ctx, t, 1, 0); | |
| 762 } | |
| 763 else | |
| 764 { | |
| 765 /* For non symbolic fonts, we look for (3,1) then (1,0), then (0,1), and finally (0,3). */ | |
| 766 enc = load_enc(ctx, t, 3, 1); | |
| 767 if (!enc) | |
| 768 enc = load_enc(ctx, t, 1, 0); | |
| 769 if (!enc) | |
| 770 enc = load_enc(ctx, t, 0, 1); | |
| 771 if (!enc) | |
| 772 enc = load_enc(ctx, t, 0, 3); | |
| 773 } | |
| 774 if (!enc) | |
| 775 fz_throw(ctx, FZ_ERROR_FORMAT, "No suitable cmap table found"); | |
| 776 } | |
| 777 fz_always(ctx) | |
| 778 { | |
| 779 fz_drop_buffer(ctx, t); | |
| 780 } | |
| 781 fz_catch(ctx) | |
| 782 { | |
| 783 fz_rethrow(ctx); | |
| 784 } | |
| 785 | |
| 786 ttf->encoding = enc; | |
| 787 } | |
| 788 | |
| 789 static void | |
| 790 reduce_encoding(fz_context *ctx, ttf_t *ttf, int *gids, int num_gids) | |
| 791 { | |
| 792 int i; | |
| 793 encoding_t *enc = ttf->encoding; | |
| 794 int n = enc->max; | |
| 795 | |
| 796 for (i = 0; i < n; i++) | |
| 797 { | |
| 798 int gid = enc->gid[i]; | |
| 799 int lo, hi; | |
| 800 | |
| 801 if (gid == 0) | |
| 802 continue; | |
| 803 | |
| 804 lo = 0; | |
| 805 hi = num_gids; | |
| 806 while (lo < hi) | |
| 807 { | |
| 808 int mid = (lo + hi)>>1; | |
| 809 int g = gids[mid]; | |
| 810 if (g < gid) | |
| 811 lo = mid+1; | |
| 812 else if (g > gid) | |
| 813 hi = mid; | |
| 814 else | |
| 815 goto found; /* Leave this one as is. */ | |
| 816 } | |
| 817 | |
| 818 /* Not found */ | |
| 819 enc->gid[i] = 0; | |
| 820 found: | |
| 821 {} | |
| 822 } | |
| 823 } | |
| 824 | |
| 825 static void | |
| 826 make_cmap(fz_context *ctx, ttf_t *ttf) | |
| 827 { | |
| 828 uint32_t i; | |
| 829 uint32_t len; | |
| 830 uint32_t segs = 0; | |
| 831 uint32_t seg, seg_start, seg_end; | |
| 832 encoding_t *enc = ttf->encoding; | |
| 833 uint32_t n = enc->max; | |
| 834 uint32_t entries = 0; | |
| 835 fz_buffer *buf; | |
| 836 uint8_t *d; | |
| 837 uint32_t offset; | |
| 838 | |
| 839 /* Make a type 4 table. */ | |
| 840 | |
| 841 /* Count the number of segments. */ | |
| 842 for (i = 0; i < n; i++) | |
| 843 { | |
| 844 if (enc->gid[i] == 0) | |
| 845 continue; | |
| 846 | |
| 847 seg_start = i; | |
| 848 seg_end = i; | |
| 849 for (i++; i<n; i++) | |
| 850 { | |
| 851 if (enc->gid[i] != 0) | |
| 852 seg_end = i; | |
| 853 else if (i - seg_end > 4) | |
| 854 break; | |
| 855 } | |
| 856 entries += seg_end - seg_start + 1; | |
| 857 segs++; | |
| 858 } | |
| 859 segs++; /* For the terminator */ | |
| 860 | |
| 861 len = 12 + 14 + 2 + segs * 2 * 4 + entries * 2; | |
| 862 buf = fz_new_buffer(ctx, len); | |
| 863 d = buf->data; | |
| 864 | |
| 865 /* cmap header */ | |
| 866 put16(d, 0); /* version */ | |
| 867 put16(d+2, 1); /* num sub tables */ | |
| 868 put16(d+4, enc->pid); | |
| 869 put16(d+6, enc->psid); | |
| 870 put32(d+8, 12); /* offset */ | |
| 871 d += 12; | |
| 872 | |
| 873 put16(d, 4); /* Format */ | |
| 874 put16(d + 2, len-12); /* Length */ | |
| 875 put16(d + 4, 0); /* FIXME: Language */ | |
| 876 put16(d + 6, segs * 2); | |
| 877 i = 0; | |
| 878 while (1U<<(i+1) <= segs) | |
| 879 i++; | |
| 880 /* So 1<<i <= segs < 1<<(i+1) */ | |
| 881 put16(d + 8, 1<<(i+1)); /* searchRange */ | |
| 882 put16(d + 10, i); /* entrySelector */ | |
| 883 put16(d + 12, 2 * segs - (1<<(i+1))); /* rangeShift */ | |
| 884 put16(d + 14 + segs * 2, 0); /* reserved */ | |
| 885 | |
| 886 /* Now output the segment data */ | |
| 887 entries = 14 + segs * 2 * 4 + 2; /* offset of where to put entries.*/ | |
| 888 seg = 0; | |
| 889 for (i = 0; i < n; i++) | |
| 890 { | |
| 891 if (enc->gid[i] == 0) | |
| 892 continue; | |
| 893 | |
| 894 seg_start = i; | |
| 895 seg_end = i; | |
| 896 offset = 14 + segs * 2 * 3 + 2 + seg * 2; | |
| 897 put16(d + offset - segs * 2, 0); /* Delta - always 0 for now. */ | |
| 898 put16(d + offset, entries - offset); /* offset */ | |
| 899 | |
| 900 /* Insert an entry */ | |
| 901 if (!ttf->is_otf && ttf->gid_renum && i < enc->max && enc->gid[i] < ttf->orig_num_glyphs) | |
| 902 put16(d + entries, (ttf->is_otf || ttf->gid_renum == NULL) ? enc->gid[i] : ttf->gid_renum[enc->gid[i]]); | |
| 903 else | |
| 904 put16(d + entries, enc->gid[i]); | |
| 905 | |
| 906 entries += 2; | |
| 907 for (i++; i < n; i++) | |
| 908 { | |
| 909 if (enc->gid[i] != 0) | |
| 910 { | |
| 911 /* Include i in the range, which means we need to add entries for | |
| 912 * seg_end to i inclusive. */ | |
| 913 while (seg_end < i) | |
| 914 { | |
| 915 seg_end++; | |
| 916 if (!ttf->is_otf && ttf->gid_renum && seg_end < enc->max && enc->gid[seg_end] < ttf->orig_num_glyphs) | |
| 917 put16(d + entries, ttf->gid_renum[enc->gid[seg_end]]); | |
| 918 else | |
| 919 put16(d + entries, enc->gid[seg_end]); | |
| 920 entries += 2; | |
| 921 } | |
| 922 } | |
| 923 else if (i - seg_end > 4) | |
| 924 break; | |
| 925 } | |
| 926 put16(d + 14 + segs * 2 + seg * 2 + 2, seg_start); | |
| 927 put16(d + 14 + seg * 2, seg_end); | |
| 928 seg++; | |
| 929 } | |
| 930 offset = 14 + segs * 2 * 3 + 2 + seg * 2; | |
| 931 put16(d + 14 + segs * 2 + seg * 2 + 2, 0xffff); | |
| 932 put16(d + 14 + seg * 2, 0xffff); | |
| 933 put16(d + offset - segs * 2, 1); /* Delta */ | |
| 934 put16(d + offset, 0); /* offset */ | |
| 935 buf->len = entries + 12; | |
| 936 assert(buf->len == buf->cap); | |
| 937 | |
| 938 add_table(ctx, ttf, TAG("cmap"), buf); | |
| 939 } | |
| 940 | |
| 941 static void | |
| 942 read_maxp(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 943 { | |
| 944 fz_buffer *t = read_table(ctx, stm, TAG("maxp"), 1); | |
| 945 | |
| 946 if (t->len < 6) | |
| 947 { | |
| 948 fz_drop_buffer(ctx, t); | |
| 949 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated maxp table"); | |
| 950 } | |
| 951 | |
| 952 ttf->orig_num_glyphs = get16(t->data+4); | |
| 953 | |
| 954 add_table(ctx, ttf, TAG("maxp"), t); | |
| 955 ttf->maxp = t->data; | |
| 956 } | |
| 957 | |
| 958 static void | |
| 959 read_head(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 960 { | |
| 961 uint32_t version; | |
| 962 fz_buffer *t = read_table(ctx, stm, TAG("head"), 1); | |
| 963 | |
| 964 if (t->len < 54) | |
| 965 { | |
| 966 fz_drop_buffer(ctx, t); | |
| 967 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated head table"); | |
| 968 } | |
| 969 | |
| 970 version = get32(t->data); | |
| 971 if (version != 0x00010000) | |
| 972 { | |
| 973 fz_drop_buffer(ctx, t); | |
| 974 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported head table version 0x%08x", version); | |
| 975 } | |
| 976 | |
| 977 ttf->index_to_loc_formatp = t->data+50; | |
| 978 ttf->index_to_loc_format = get16(ttf->index_to_loc_formatp); | |
| 979 if (ttf->index_to_loc_format & ~1) | |
| 980 { | |
| 981 fz_drop_buffer(ctx, t); | |
| 982 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported index_to_loc_format 0x%04x", ttf->index_to_loc_format); | |
| 983 } | |
| 984 | |
| 985 add_table(ctx, ttf, TAG("head"), t); | |
| 986 } | |
| 987 | |
| 988 static void | |
| 989 read_loca(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 990 { | |
| 991 fz_buffer *t; | |
| 992 uint32_t len = (2<<ttf->index_to_loc_format) * (ttf->orig_num_glyphs+1); | |
| 993 | |
| 994 t = read_table(ctx, stm, TAG("loca"), 1); | |
| 995 | |
| 996 if (t->len < len) | |
| 997 { | |
| 998 fz_drop_buffer(ctx, t); | |
| 999 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated loca table"); | |
| 1000 } | |
| 1001 | |
| 1002 ttf->loca = t->data; | |
| 1003 ttf->loca_len = &t->len; | |
| 1004 | |
| 1005 add_table(ctx, ttf, TAG("loca"), t); | |
| 1006 } | |
| 1007 | |
| 1008 static void | |
| 1009 read_hhea(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 1010 { | |
| 1011 uint32_t version; | |
| 1012 fz_buffer *t = read_table(ctx, stm, TAG("hhea"), 1); | |
| 1013 uint16_t i; | |
| 1014 | |
| 1015 if (t->len < 36) | |
| 1016 { | |
| 1017 fz_drop_buffer(ctx, t); | |
| 1018 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated hhea table"); | |
| 1019 } | |
| 1020 | |
| 1021 version = get32(t->data); | |
| 1022 if (version != 0x00010000) | |
| 1023 { | |
| 1024 fz_drop_buffer(ctx, t); | |
| 1025 fz_throw(ctx, FZ_ERROR_FORMAT, "Unsupported hhea table version 0x%08x", version); | |
| 1026 } | |
| 1027 | |
| 1028 ttf->orig_num_long_hor_metrics = get16(t->data+34); | |
| 1029 if (ttf->orig_num_long_hor_metrics > ttf->orig_num_glyphs) | |
| 1030 { | |
| 1031 fz_drop_buffer(ctx, t); | |
| 1032 fz_throw(ctx, FZ_ERROR_FORMAT, "Overlong hhea table"); | |
| 1033 } | |
| 1034 | |
| 1035 add_table(ctx, ttf, TAG("hhea"), t); | |
| 1036 | |
| 1037 /* Previously gids 0 to orig_num_long_hor_metrics-1 were described with | |
| 1038 * hor metrics, and the ones afterwards were fixed widths. Find where | |
| 1039 * that dividing line is in our new reduced set. */ | |
| 1040 if (ttf->encoding && !ttf->is_otf && ttf->orig_num_long_hor_metrics > 0) | |
| 1041 { | |
| 1042 /* i = 0 is always kept long in subset_hmtx(). */ | |
| 1043 ttf->new_num_long_hor_metrics = 1; | |
| 1044 for (i = ttf->orig_num_long_hor_metrics-1; i > 0; i--) | |
| 1045 if (ttf->gid_renum[i]) | |
| 1046 { | |
| 1047 ttf->new_num_long_hor_metrics = ttf->gid_renum[i]+1; | |
| 1048 break; | |
| 1049 } | |
| 1050 | |
| 1051 put16(t->data+34, ttf->new_num_long_hor_metrics); | |
| 1052 } | |
| 1053 else | |
| 1054 { | |
| 1055 ttf->new_num_long_hor_metrics = ttf->orig_num_long_hor_metrics; | |
| 1056 } | |
| 1057 } | |
| 1058 | |
| 1059 static uint32_t | |
| 1060 get_loca(fz_context *ctx, ttf_t *ttf, uint32_t n) | |
| 1061 { | |
| 1062 if (ttf->index_to_loc_format == 0) | |
| 1063 { | |
| 1064 /* Short index - convert from words to bytes */ | |
| 1065 return get16(ttf->loca + n*2) * 2; | |
| 1066 } | |
| 1067 else | |
| 1068 { | |
| 1069 /* Long index - in bytes already */ | |
| 1070 return get32(ttf->loca + n*4); | |
| 1071 } | |
| 1072 } | |
| 1073 | |
| 1074 static void | |
| 1075 put_loca(fz_context *ctx, ttf_t *ttf, uint32_t n, uint32_t off) | |
| 1076 { | |
| 1077 if (ttf->index_to_loc_format == 0) | |
| 1078 { | |
| 1079 /* Short index - convert from bytes to words */ | |
| 1080 assert((off & 1) == 0); | |
| 1081 put16(ttf->loca + n*2, off/2); | |
| 1082 } | |
| 1083 else | |
| 1084 { | |
| 1085 /* Long index - in bytes already */ | |
| 1086 put32(ttf->loca + n*4, off); | |
| 1087 } | |
| 1088 } | |
| 1089 | |
| 1090 static void | |
| 1091 glyph_used(fz_context *ctx, ttf_t *ttf, fz_buffer *glyf, uint16_t i) | |
| 1092 { | |
| 1093 uint32_t offset, len; | |
| 1094 const uint8_t *data; | |
| 1095 uint16_t flags; | |
| 1096 | |
| 1097 if (i >= ttf->orig_num_glyphs) | |
| 1098 { | |
| 1099 fz_warn(ctx, "TTF subsetting; gid >= num_gids!"); | |
| 1100 return; | |
| 1101 } | |
| 1102 | |
| 1103 if (ttf->gid_renum[i] != 0) | |
| 1104 return; | |
| 1105 | |
| 1106 ttf->gid_renum[i] = 1; | |
| 1107 | |
| 1108 /* If this glyf is composite, then we need to add any dependencies of it. */ | |
| 1109 offset = get_loca(ctx, ttf, i); | |
| 1110 len = get_loca(ctx, ttf, i+1) - offset; | |
| 1111 if (len == 0) | |
| 1112 return; | |
| 1113 if (offset+2 > glyf->len) | |
| 1114 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1115 data = glyf->data + offset; | |
| 1116 if ((int16_t)get16(data) >= 0) | |
| 1117 return; /* Single glyph - no dependencies */ | |
| 1118 data += 4 * 2 + 2; | |
| 1119 if (len < 4*2 + 2) | |
| 1120 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1121 len -= 4 * 2 + 2; | |
| 1122 do | |
| 1123 { | |
| 1124 uint16_t idx, skip; | |
| 1125 | |
| 1126 if (len < 4) | |
| 1127 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1128 | |
| 1129 flags = get16(data); | |
| 1130 idx = get16(data+2); | |
| 1131 | |
| 1132 glyph_used(ctx, ttf, glyf, idx); | |
| 1133 | |
| 1134 #define ARGS_1_AND_2_ARE_WORDS 1 | |
| 1135 #define ARGS_ARE_XY_VALUES 2 | |
| 1136 #define WE_HAVE_A_SCALE 8 | |
| 1137 #define MORE_COMPONENTS 32 | |
| 1138 #define WE_HAVE_AN_X_AND_Y_SCALE 64 | |
| 1139 #define WE_HAVE_A_TWO_BY_TWO 128 | |
| 1140 | |
| 1141 /* Skip the X and Y offsets */ | |
| 1142 if (flags & ARGS_1_AND_2_ARE_WORDS) | |
| 1143 skip = 4 + 4; | |
| 1144 else | |
| 1145 skip = 4 + 2; | |
| 1146 | |
| 1147 /* Skip the transformation */ | |
| 1148 switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO)) | |
| 1149 { | |
| 1150 case 0: | |
| 1151 /* No extra to skip */ | |
| 1152 break; | |
| 1153 case WE_HAVE_A_SCALE: | |
| 1154 skip += 2; | |
| 1155 break; | |
| 1156 case WE_HAVE_AN_X_AND_Y_SCALE: | |
| 1157 skip += 4; | |
| 1158 break; | |
| 1159 case WE_HAVE_A_TWO_BY_TWO: | |
| 1160 skip += 8; | |
| 1161 break; | |
| 1162 } | |
| 1163 if (len < skip) | |
| 1164 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1165 data += skip; | |
| 1166 len -= skip; | |
| 1167 } | |
| 1168 while(flags & MORE_COMPONENTS); | |
| 1169 } | |
| 1170 | |
| 1171 static void | |
| 1172 renumber_composite(fz_context *ctx, ttf_t *ttf, uint8_t *data, uint32_t len) | |
| 1173 { | |
| 1174 uint16_t flags; | |
| 1175 uint16_t x; | |
| 1176 | |
| 1177 data += 4 * 2 + 2; | |
| 1178 if (len < 4*2 + 2) | |
| 1179 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1180 len -= 4 * 2 + 2; | |
| 1181 do | |
| 1182 { | |
| 1183 uint16_t skip; | |
| 1184 | |
| 1185 if (len < 4) | |
| 1186 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1187 | |
| 1188 flags = get16(data); | |
| 1189 x = get16(data+2); | |
| 1190 if (x >= ttf->orig_num_glyphs) | |
| 1191 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1192 put16(data+2, ttf->gid_renum[x]); | |
| 1193 | |
| 1194 /* Skip the X and Y offsets */ | |
| 1195 if (flags & ARGS_1_AND_2_ARE_WORDS) | |
| 1196 skip = 4 + 4; | |
| 1197 else | |
| 1198 skip = 4 + 2; | |
| 1199 | |
| 1200 /* Skip the transformation */ | |
| 1201 switch (flags & (WE_HAVE_A_SCALE + WE_HAVE_AN_X_AND_Y_SCALE + WE_HAVE_A_TWO_BY_TWO)) | |
| 1202 { | |
| 1203 case 0: | |
| 1204 /* No extra to skip */ | |
| 1205 break; | |
| 1206 case WE_HAVE_A_SCALE: | |
| 1207 skip += 2; | |
| 1208 break; | |
| 1209 case WE_HAVE_AN_X_AND_Y_SCALE: | |
| 1210 skip += 4; | |
| 1211 break; | |
| 1212 case WE_HAVE_A_TWO_BY_TWO: | |
| 1213 skip += 8; | |
| 1214 break; | |
| 1215 } | |
| 1216 if (len < skip) | |
| 1217 fz_throw(ctx, FZ_ERROR_FORMAT, "Corrupt glyf data"); | |
| 1218 data += skip; | |
| 1219 len -= skip; | |
| 1220 } | |
| 1221 while(flags & MORE_COMPONENTS); | |
| 1222 } | |
| 1223 | |
| 1224 static void | |
| 1225 read_glyf(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) | |
| 1226 { | |
| 1227 uint32_t len = get_loca(ctx, ttf, ttf->orig_num_glyphs); | |
| 1228 fz_buffer *t = read_table(ctx, stm, TAG("glyf"), 1); | |
| 1229 encoding_t *enc = ttf->encoding; | |
| 1230 uint32_t last_loca, i, j, k; | |
| 1231 uint32_t new_start, old_start, old_end, last_loca_ofs; | |
| 1232 | |
| 1233 if (t->len < len) | |
| 1234 { | |
| 1235 fz_drop_buffer(ctx, t); | |
| 1236 fz_throw(ctx, FZ_ERROR_FORMAT, "truncated glyf table"); | |
| 1237 } | |
| 1238 | |
| 1239 add_table(ctx, ttf, TAG("glyf"), t); | |
| 1240 | |
| 1241 /* Now, make the renumber list for the glyphs. */ | |
| 1242 ttf->gid_renum = fz_calloc(ctx, ttf->orig_num_glyphs, sizeof(uint16_t)); | |
| 1243 | |
| 1244 /* Initially, we'll use it just as a usage list. 0 = unused, 1 used */ | |
| 1245 | |
| 1246 /* glyph 0 is always used. */ | |
| 1247 glyph_used(ctx, ttf, t, 0); | |
| 1248 | |
| 1249 if (enc) | |
| 1250 { | |
| 1251 uint32_t n = enc->max; | |
| 1252 /* If we have an encoding table, run through it, and keep anything needed from there. */ | |
| 1253 for (i = 0; i < n; i++) | |
| 1254 if (enc->gid[i]) | |
| 1255 glyph_used(ctx, ttf, t, enc->gid[i]); | |
| 1256 | |
| 1257 /* Now convert from a usage table to a renumbering table. */ | |
| 1258 if (ttf->orig_num_glyphs > 0) | |
| 1259 { | |
| 1260 ttf->gid_renum[0] = 0; | |
| 1261 j = 1; | |
| 1262 for (i = 1; i < ttf->orig_num_glyphs; i++) | |
| 1263 if (ttf->gid_renum[i]) | |
| 1264 ttf->gid_renum[i] = j++; | |
| 1265 ttf->new_num_glyphs = j; | |
| 1266 } | |
| 1267 else | |
| 1268 { | |
| 1269 ttf->new_num_glyphs = 0; | |
| 1270 } | |
| 1271 } | |
| 1272 else | |
| 1273 { | |
| 1274 /* We're a cid font. The cids are gids. */ | |
| 1275 for (i = 0; i < (uint32_t)num_gids; i++) | |
| 1276 glyph_used(ctx, ttf, t, gids[i]); | |
| 1277 ttf->new_num_glyphs = ttf->orig_num_glyphs; | |
| 1278 } | |
| 1279 | |
| 1280 /* Now subset the glyf table. */ | |
| 1281 if (enc) | |
| 1282 { | |
| 1283 old_start = get_loca(ctx, ttf, 0); | |
| 1284 if (old_start > t->len) | |
| 1285 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); | |
| 1286 old_end = get_loca(ctx, ttf, 1); | |
| 1287 if (old_end > t->len || old_end < old_start) | |
| 1288 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); | |
| 1289 len = old_end - old_start; | |
| 1290 new_start = 0; | |
| 1291 put_loca(ctx, ttf, 0, new_start); | |
| 1292 last_loca = 0; | |
| 1293 last_loca_ofs = len; | |
| 1294 for (i = 0; i < ttf->orig_num_glyphs; i++) | |
| 1295 { | |
| 1296 old_end = get_loca(ctx, ttf, i + 1); | |
| 1297 if (old_end > t->len || old_end < old_start) | |
| 1298 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); | |
| 1299 len = old_end - old_start; | |
| 1300 if (len > 0 && (i == 0 || ttf->gid_renum[i] != 0)) | |
| 1301 { | |
| 1302 memmove(t->data + new_start, t->data + old_start, len); | |
| 1303 if ((int16_t)get16(t->data + new_start) < 0) | |
| 1304 renumber_composite(ctx, ttf, t->data + new_start, len); | |
| 1305 for (k = last_loca + 1; k <= ttf->gid_renum[i]; k++) | |
| 1306 put_loca(ctx, ttf, k, last_loca_ofs); | |
| 1307 new_start += len; | |
| 1308 last_loca = ttf->gid_renum[i]; | |
| 1309 last_loca_ofs = new_start; | |
| 1310 } | |
| 1311 old_start = old_end; | |
| 1312 } | |
| 1313 for (k = last_loca + 1; k <= ttf->new_num_glyphs; k++) | |
| 1314 put_loca(ctx, ttf, k, last_loca_ofs); | |
| 1315 } | |
| 1316 else | |
| 1317 { | |
| 1318 new_start = 0; | |
| 1319 old_start = get_loca(ctx, ttf, 0); | |
| 1320 if (old_start > t->len) | |
| 1321 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); | |
| 1322 for (i = 0; i < ttf->orig_num_glyphs; i++) | |
| 1323 { | |
| 1324 old_end = get_loca(ctx, ttf, i + 1); | |
| 1325 if (old_end > t->len || old_end < old_start) | |
| 1326 fz_throw(ctx, FZ_ERROR_FORMAT, "Bad loca value"); | |
| 1327 len = old_end - old_start; | |
| 1328 if (len > 0 && ttf->gid_renum[i] != 0) | |
| 1329 { | |
| 1330 memmove(t->data + new_start, t->data + old_start, len); | |
| 1331 put_loca(ctx, ttf, i, new_start); | |
| 1332 new_start += len; | |
| 1333 } | |
| 1334 else | |
| 1335 { | |
| 1336 put_loca(ctx, ttf, i, new_start); | |
| 1337 } | |
| 1338 old_start = old_end; | |
| 1339 } | |
| 1340 put_loca(ctx, ttf, ttf->orig_num_glyphs, new_start); | |
| 1341 } | |
| 1342 | |
| 1343 *ttf->loca_len = (size_t) (ttf->new_num_glyphs + 1) * (2<<ttf->index_to_loc_format); | |
| 1344 t->len = new_start; | |
| 1345 } | |
| 1346 | |
| 1347 static void | |
| 1348 update_num_glyphs(fz_context *ctx, ttf_t *ttf) | |
| 1349 { | |
| 1350 put16(ttf->maxp + 4, ttf->new_num_glyphs); | |
| 1351 } | |
| 1352 | |
| 1353 static void | |
| 1354 subset_hmtx(fz_context *ctx, ttf_t *ttf, fz_stream *stm) | |
| 1355 { | |
| 1356 fz_buffer *t = read_table(ctx, stm, TAG("hmtx"), 1); | |
| 1357 uint16_t long_metrics, short_metrics, i, k; | |
| 1358 uint8_t *s = t->data; | |
| 1359 uint8_t *d = t->data; | |
| 1360 int cidfont = (ttf->encoding == NULL); | |
| 1361 | |
| 1362 long_metrics = ttf->orig_num_long_hor_metrics; | |
| 1363 if (long_metrics > ttf->orig_num_glyphs) | |
| 1364 long_metrics = ttf->orig_num_glyphs; | |
| 1365 if (long_metrics > t->len / 4) | |
| 1366 long_metrics = (uint16_t)(t->len / 4); | |
| 1367 | |
| 1368 short_metrics = (uint16_t)((t->len - long_metrics * 4) / 2); | |
| 1369 if (short_metrics > ttf->orig_num_glyphs - long_metrics) | |
| 1370 short_metrics = ttf->orig_num_glyphs - long_metrics; | |
| 1371 | |
| 1372 for (i = 0; i < long_metrics; i++) | |
| 1373 { | |
| 1374 if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) | |
| 1375 { | |
| 1376 put32(d, get32(s)); | |
| 1377 d += 4; | |
| 1378 } | |
| 1379 else if (cidfont) | |
| 1380 { | |
| 1381 put32(d, 0); | |
| 1382 d += 4; | |
| 1383 } | |
| 1384 s += 4; | |
| 1385 } | |
| 1386 for (k = 0 ; k < short_metrics; k++, i++) | |
| 1387 { | |
| 1388 if (i == 0 || ttf->is_otf || (i < ttf->orig_num_glyphs && ttf->gid_renum[i])) | |
| 1389 { | |
| 1390 put16(d, get16(s)); | |
| 1391 d += 2; | |
| 1392 } | |
| 1393 else if (cidfont) | |
| 1394 { | |
| 1395 put16(d, 0); | |
| 1396 d += 2; | |
| 1397 } | |
| 1398 s += 2; | |
| 1399 } | |
| 1400 t->len = (d - t->data); | |
| 1401 | |
| 1402 add_table(ctx, ttf, TAG("hmtx"), t); | |
| 1403 } | |
| 1404 | |
| 1405 static void | |
| 1406 shrink_loca_if_possible(fz_context *ctx, ttf_t *ttf) | |
| 1407 { | |
| 1408 uint32_t len; | |
| 1409 uint16_t i, n; | |
| 1410 uint8_t *loca; | |
| 1411 | |
| 1412 if (ttf->index_to_loc_format == 0) | |
| 1413 return; /* Can't shrink cos it's already shrunk! */ | |
| 1414 | |
| 1415 n = ttf->new_num_glyphs; | |
| 1416 len = get_loca(ctx, ttf, n); | |
| 1417 if (len >= 65536) | |
| 1418 return; /* We can't shrink it, cos it's too big. */ | |
| 1419 | |
| 1420 loca = ttf->loca; | |
| 1421 for (i = 0; i <= n; i++) | |
| 1422 { | |
| 1423 if (get32(loca + 4*i) & 1) | |
| 1424 return; /* Can't shrink it, because an offset is not even */ | |
| 1425 } | |
| 1426 | |
| 1427 for (i = 0; i <= n; i++) | |
| 1428 { | |
| 1429 put16(loca + 2*i, get32(loca + 4*i)/2); | |
| 1430 } | |
| 1431 *ttf->loca_len = 2*(n+1); | |
| 1432 put16(ttf->index_to_loc_formatp, 0); | |
| 1433 } | |
| 1434 | |
| 1435 static struct { const char *charname; int idx; } macroman[] = | |
| 1436 { | |
| 1437 { ".notdef", 0}, | |
| 1438 { ".null", 1}, | |
| 1439 { "A", 36}, | |
| 1440 { "AE", 144}, | |
| 1441 { "Aacute", 201}, | |
| 1442 { "Acircumflex", 199}, | |
| 1443 { "Adieresis", 98}, | |
| 1444 { "Agrave", 173}, | |
| 1445 { "Aring", 99}, | |
| 1446 { "Atilde", 174}, | |
| 1447 { "B", 37}, | |
| 1448 { "C", 38}, | |
| 1449 { "Cacute", 253}, | |
| 1450 { "Ccaron", 255}, | |
| 1451 { "Ccedilla", 100}, | |
| 1452 { "D", 39}, | |
| 1453 { "Delta", 168}, | |
| 1454 { "E", 40}, | |
| 1455 { "Eacute", 101}, | |
| 1456 { "Ecircumflex", 200}, | |
| 1457 { "Edieresis", 202}, | |
| 1458 { "Egrave", 203}, | |
| 1459 { "Eth", 233}, | |
| 1460 { "F", 41}, | |
| 1461 { "G", 42}, | |
| 1462 { "Gbreve", 248}, | |
| 1463 { "H", 43}, | |
| 1464 { "I", 44}, | |
| 1465 { "Iacute", 204}, | |
| 1466 { "Icircumflex", 205}, | |
| 1467 { "Idieresis", 206}, | |
| 1468 { "Idotaccent", 250}, | |
| 1469 { "Igrave", 207}, | |
| 1470 { "J", 45}, | |
| 1471 { "K", 46}, | |
| 1472 { "L", 47}, | |
| 1473 { "Lslash", 226}, | |
| 1474 { "M", 48}, | |
| 1475 { "N", 49}, | |
| 1476 { "Ntilde", 102}, | |
| 1477 { "O", 50}, | |
| 1478 { "OE", 176}, | |
| 1479 { "Oacute", 208}, | |
| 1480 { "Ocircumflex", 209}, | |
| 1481 { "Odieresis", 103}, | |
| 1482 { "Ograve", 211}, | |
| 1483 { "Omega", 159}, | |
| 1484 { "Oslash", 145}, | |
| 1485 { "Otilde", 175}, | |
| 1486 { "P", 51}, | |
| 1487 { "Q", 52}, | |
| 1488 { "R", 53}, | |
| 1489 { "S", 54}, | |
| 1490 { "Scaron", 228}, | |
| 1491 { "Scedilla", 251}, | |
| 1492 { "T", 55}, | |
| 1493 { "Thorn", 237}, | |
| 1494 { "U", 56}, | |
| 1495 { "Uacute", 212}, | |
| 1496 { "Ucircumflex", 213}, | |
| 1497 { "Udieresis", 104}, | |
| 1498 { "Ugrave", 214}, | |
| 1499 { "V", 57}, | |
| 1500 { "W", 58}, | |
| 1501 { "X", 59}, | |
| 1502 { "Y", 60}, | |
| 1503 { "Yacute", 235}, | |
| 1504 { "Ydieresis", 187}, | |
| 1505 { "Z", 61}, | |
| 1506 { "Zcaron", 230}, | |
| 1507 { "a", 68}, | |
| 1508 { "aacute", 105}, | |
| 1509 { "acircumflex", 107}, | |
| 1510 { "acute", 141}, | |
| 1511 { "adieresis", 108}, | |
| 1512 { "ae", 160}, | |
| 1513 { "agrave", 106}, | |
| 1514 { "ampersand", 9}, | |
| 1515 { "apple", 210}, | |
| 1516 { "approxequal", 167}, | |
| 1517 { "aring", 110}, | |
| 1518 { "asciicircum", 65}, | |
| 1519 { "asciitilde", 97}, | |
| 1520 { "asterisk", 13}, | |
| 1521 { "at", 35}, | |
| 1522 { "atilde", 109}, | |
| 1523 { "b", 69}, | |
| 1524 { "backslash", 63}, | |
| 1525 { "bar", 95}, | |
| 1526 { "braceleft", 94}, | |
| 1527 { "braceright", 96}, | |
| 1528 { "bracketleft", 62}, | |
| 1529 { "bracketright", 64}, | |
| 1530 { "breve", 219}, | |
| 1531 { "brokenbar", 232}, | |
| 1532 { "bullet", 135}, | |
| 1533 { "c", 70}, | |
| 1534 { "cacute", 254}, | |
| 1535 { "caron", 225}, | |
| 1536 { "ccaron", 256}, | |
| 1537 { "ccedilla", 111}, | |
| 1538 { "cedilla", 222}, | |
| 1539 { "cent", 132}, | |
| 1540 { "circumflex", 216}, | |
| 1541 { "colon", 29}, | |
| 1542 { "comma", 15}, | |
| 1543 { "copyright", 139}, | |
| 1544 { "currency", 189}, | |
| 1545 { "d", 71}, | |
| 1546 { "dagger", 130}, | |
| 1547 { "daggerdbl", 194}, | |
| 1548 { "dcroat", 257}, | |
| 1549 { "degree", 131}, | |
| 1550 { "dieresis", 142}, | |
| 1551 { "divide", 184}, | |
| 1552 { "dollar", 7}, | |
| 1553 { "dotaccent", 220}, | |
| 1554 { "dotlessi", 215}, | |
| 1555 { "e", 72}, | |
| 1556 { "eacute", 112}, | |
| 1557 { "ecircumflex", 114}, | |
| 1558 { "edieresis", 115}, | |
| 1559 { "egrave", 113}, | |
| 1560 { "eight", 27}, | |
| 1561 { "ellipsis", 171}, | |
| 1562 { "emdash", 179}, | |
| 1563 { "endash", 178}, | |
| 1564 { "equal", 32}, | |
| 1565 { "eth", 234}, | |
| 1566 { "exclam", 4}, | |
| 1567 { "exclamdown", 163}, | |
| 1568 { "f", 73}, | |
| 1569 { "fi", 192}, | |
| 1570 { "five", 24}, | |
| 1571 { "fl", 193}, | |
| 1572 { "florin", 166}, | |
| 1573 { "four", 23}, | |
| 1574 { "fraction", 188}, | |
| 1575 { "franc", 247}, | |
| 1576 { "g", 74}, | |
| 1577 { "gbreve", 249}, | |
| 1578 { "germandbls", 137}, | |
| 1579 { "grave", 67}, | |
| 1580 { "greater", 33}, | |
| 1581 { "greaterequal", 149}, | |
| 1582 { "guillemotleft", 169}, | |
| 1583 { "guillemotright", 170}, | |
| 1584 { "guilsinglleft", 190}, | |
| 1585 { "guilsinglright", 191}, | |
| 1586 { "h", 75}, | |
| 1587 { "hungarumlaut", 223}, | |
| 1588 { "hyphen", 16}, | |
| 1589 { "i", 76}, | |
| 1590 { "iacute", 116}, | |
| 1591 { "icircumflex", 118}, | |
| 1592 { "idieresis", 119}, | |
| 1593 { "igrave", 117}, | |
| 1594 { "infinity", 146}, | |
| 1595 { "integral", 156}, | |
| 1596 { "j", 77}, | |
| 1597 { "k", 78}, | |
| 1598 { "l", 79}, | |
| 1599 { "less", 31}, | |
| 1600 { "lessequal", 148}, | |
| 1601 { "logicalnot", 164}, | |
| 1602 { "lozenge", 185}, | |
| 1603 { "lslash", 227}, | |
| 1604 { "m", 80}, | |
| 1605 { "macron", 218}, | |
| 1606 { "minus", 239}, | |
| 1607 { "mu", 151}, | |
| 1608 { "multiply", 240}, | |
| 1609 { "n", 81}, | |
| 1610 { "nine", 28}, | |
| 1611 { "nonbreakingspace", 172}, | |
| 1612 { "nonmarkingreturn", 2}, | |
| 1613 { "notequal", 143}, | |
| 1614 { "ntilde", 120}, | |
| 1615 { "numbersign", 6}, | |
| 1616 { "o", 82}, | |
| 1617 { "oacute", 121}, | |
| 1618 { "ocircumflex", 123}, | |
| 1619 { "odieresis", 124}, | |
| 1620 { "oe", 177}, | |
| 1621 { "ogonek", 224}, | |
| 1622 { "ograve", 122}, | |
| 1623 { "one", 20}, | |
| 1624 { "onehalf", 244}, | |
| 1625 { "onequarter", 245}, | |
| 1626 { "onesuperior", 241}, | |
| 1627 { "ordfeminine", 157}, | |
| 1628 { "ordmasculine", 158}, | |
| 1629 { "oslash", 161}, | |
| 1630 { "otilde", 125}, | |
| 1631 { "p", 83}, | |
| 1632 { "paragraph", 136}, | |
| 1633 { "parenleft", 11}, | |
| 1634 { "parenright", 12}, | |
| 1635 { "partialdiff", 152}, | |
| 1636 { "percent", 8}, | |
| 1637 { "period", 17}, | |
| 1638 { "periodcentered", 195}, | |
| 1639 { "perthousand", 198}, | |
| 1640 { "pi", 155}, | |
| 1641 { "plus", 14}, | |
| 1642 { "plusminus", 147}, | |
| 1643 { "product", 154}, | |
| 1644 { "q", 84}, | |
| 1645 { "question", 34}, | |
| 1646 { "questiondown", 162}, | |
| 1647 { "quotedbl", 5}, | |
| 1648 { "quotedblbase", 197}, | |
| 1649 { "quotedblleft", 180}, | |
| 1650 { "quotedblright", 181}, | |
| 1651 { "quoteleft", 182}, | |
| 1652 { "quoteright", 183}, | |
| 1653 { "quotesinglbase", 196}, | |
| 1654 { "quotesingle", 10}, | |
| 1655 { "r", 85}, | |
| 1656 { "radical", 165}, | |
| 1657 { "registered", 138}, | |
| 1658 { "ring", 221}, | |
| 1659 { "s", 86}, | |
| 1660 { "scaron", 229}, | |
| 1661 { "scedilla", 252}, | |
| 1662 { "section", 134}, | |
| 1663 { "semicolon", 30}, | |
| 1664 { "seven", 26}, | |
| 1665 { "six", 25}, | |
| 1666 { "slash", 18}, | |
| 1667 { "space", 3}, | |
| 1668 { "sterling", 133}, | |
| 1669 { "summation", 153}, | |
| 1670 { "t", 87}, | |
| 1671 { "thorn", 238}, | |
| 1672 { "three", 22}, | |
| 1673 { "threequarters", 246}, | |
| 1674 { "threesuperior", 243}, | |
| 1675 { "tilde", 217}, | |
| 1676 { "trademark", 140}, | |
| 1677 { "two", 21}, | |
| 1678 { "twosuperior", 242}, | |
| 1679 { "u", 88}, | |
| 1680 { "uacute", 126}, | |
| 1681 { "ucircumflex", 128}, | |
| 1682 { "udieresis", 129}, | |
| 1683 { "ugrave", 127}, | |
| 1684 { "underscore", 66}, | |
| 1685 { "v", 89}, | |
| 1686 { "w", 90}, | |
| 1687 { "x", 91}, | |
| 1688 { "y", 92}, | |
| 1689 { "yacute", 236}, | |
| 1690 { "ydieresis", 186}, | |
| 1691 { "yen", 150}, | |
| 1692 { "z", 93}, | |
| 1693 { "zcaron", 231}, | |
| 1694 { "zero", 19}, | |
| 1695 }; | |
| 1696 | |
| 1697 static int | |
| 1698 find_macroman_string(const char *s) | |
| 1699 { | |
| 1700 int l, r, m; | |
| 1701 int comparison; | |
| 1702 | |
| 1703 l = 0; | |
| 1704 r = nelem(macroman); | |
| 1705 while (l <= r) | |
| 1706 { | |
| 1707 m = (l + r) >> 1; | |
| 1708 comparison = strcmp(s, macroman[m].charname); | |
| 1709 if (comparison < 0) | |
| 1710 r = m - 1; | |
| 1711 else if (comparison > 0) | |
| 1712 l = m + 1; | |
| 1713 else | |
| 1714 return macroman[m].idx; | |
| 1715 } | |
| 1716 | |
| 1717 return -1; | |
| 1718 } | |
| 1719 | |
| 1720 static size_t | |
| 1721 subset_post2(fz_context *ctx, ttf_t *ttf, uint8_t *d, size_t len, int *gids, int num_gids) | |
| 1722 { | |
| 1723 int i, n, new_glyphs, old_strings, new_strings; | |
| 1724 int j; | |
| 1725 fz_int2_heap heap = { 0 }; | |
| 1726 uint8_t *d0, *e, *p; | |
| 1727 | |
| 1728 if (len < (size_t) 2 + 2 * ttf->orig_num_glyphs) | |
| 1729 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table"); | |
| 1730 | |
| 1731 n = get16(d); | |
| 1732 if ((uint32_t)n != ttf->orig_num_glyphs) | |
| 1733 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); | |
| 1734 | |
| 1735 d0 = d; | |
| 1736 d += 2; len -= 2; | |
| 1737 e = d; | |
| 1738 p = d; | |
| 1739 | |
| 1740 /* Store all kept indexes. */ | |
| 1741 if (len < (size_t)n*2) | |
| 1742 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); | |
| 1743 old_strings = 0; | |
| 1744 new_strings = 0; | |
| 1745 new_glyphs = 0; | |
| 1746 j = 0; | |
| 1747 len -= (size_t)n*2; | |
| 1748 for (i = 0; i < n; i++) | |
| 1749 { | |
| 1750 uint16_t o = get16(d); | |
| 1751 fz_int2 i2; | |
| 1752 p += 2; | |
| 1753 | |
| 1754 if (o >= 258) | |
| 1755 old_strings++; | |
| 1756 | |
| 1757 /* We're only keeping gids we want. */ | |
| 1758 /* Note we need to keep both the gids we were given by the caller, but also | |
| 1759 * those required as composites (in gid_renum, if we have it). */ | |
| 1760 if (i != 0 && (j >= num_gids || gids[j] != i) && (ttf->gid_renum == NULL || ttf->gid_renum[i] == 0)) | |
| 1761 { | |
| 1762 memmove(d, d + 2, (n - i - 1) * 2); | |
| 1763 continue; | |
| 1764 } | |
| 1765 if (j < num_gids && gids[j] == i) | |
| 1766 j++; | |
| 1767 | |
| 1768 d += 2; | |
| 1769 e += 2; | |
| 1770 | |
| 1771 /* We want this gid. */ | |
| 1772 new_glyphs++; | |
| 1773 | |
| 1774 /* 257 or smaller: same as in the basic order, keep it as such. */ | |
| 1775 if (o <= 257) | |
| 1776 continue; | |
| 1777 | |
| 1778 /* check if string is one of the macroman standard ones, and use its index if so. */ | |
| 1779 { | |
| 1780 uint8_t *q = d0 + 2 + (size_t) n * 2; | |
| 1781 int k; | |
| 1782 char buf[257] = { 0 }; | |
| 1783 int macidx; | |
| 1784 for (k = 0; k < o - 258; k++) | |
| 1785 q += 1 + *q; | |
| 1786 for (k = 0; k < *q; k++) | |
| 1787 buf[k] = *(q + 1 + k); | |
| 1788 | |
| 1789 macidx = find_macroman_string(buf); | |
| 1790 | |
| 1791 if (macidx >= 0) | |
| 1792 { | |
| 1793 put16(d - 2, macidx); | |
| 1794 continue; | |
| 1795 } | |
| 1796 } | |
| 1797 | |
| 1798 /* We want this gid, and it is a string. */ | |
| 1799 new_strings++; | |
| 1800 | |
| 1801 /* Store the index. */ | |
| 1802 i2.a = o - 258; | |
| 1803 i2.b = i; | |
| 1804 fz_int2_heap_insert(ctx, &heap, i2); | |
| 1805 | |
| 1806 /* Update string index value in table entry. */ | |
| 1807 put16(d - 2, 257 + new_strings); | |
| 1808 } | |
| 1809 | |
| 1810 d = p; | |
| 1811 | |
| 1812 /* Update number of indexes */ | |
| 1813 put16(d0, new_glyphs); | |
| 1814 | |
| 1815 fz_int2_heap_sort(ctx, &heap); | |
| 1816 | |
| 1817 /* So, the heap is sorted on i2.a (the string indexes we want to keep), | |
| 1818 * and i2.b is the gid that refers to that index. */ | |
| 1819 | |
| 1820 /* Run through the list moving the strings down that we care about. */ | |
| 1821 j = 0; | |
| 1822 n = old_strings; | |
| 1823 for (i = 0; i < n; i++) | |
| 1824 { | |
| 1825 uint8_t slen; | |
| 1826 | |
| 1827 if (len < 1) | |
| 1828 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); | |
| 1829 slen = *d+1; | |
| 1830 if (len < slen) | |
| 1831 fz_throw(ctx, FZ_ERROR_FORMAT, "Malformed post table"); | |
| 1832 len -= slen; | |
| 1833 | |
| 1834 if (j >= heap.len || heap.heap[j].a != i) | |
| 1835 { | |
| 1836 /* Drop this one. */ | |
| 1837 d += slen; | |
| 1838 continue; | |
| 1839 } | |
| 1840 | |
| 1841 memmove(e, d, slen); | |
| 1842 d += slen; | |
| 1843 e += slen; | |
| 1844 | |
| 1845 j++; | |
| 1846 } | |
| 1847 | |
| 1848 fz_free(ctx, heap.heap); | |
| 1849 | |
| 1850 return e - d0; | |
| 1851 } | |
| 1852 | |
| 1853 static void | |
| 1854 subset_post(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids) | |
| 1855 { | |
| 1856 fz_buffer *t = read_table(ctx, stm, TAG("post"), 0); | |
| 1857 uint8_t *d; | |
| 1858 size_t len; | |
| 1859 uint32_t fmt; | |
| 1860 | |
| 1861 if (t == NULL) | |
| 1862 return; | |
| 1863 | |
| 1864 d = t->data; | |
| 1865 len = t->len; | |
| 1866 | |
| 1867 if (len < 32) | |
| 1868 { | |
| 1869 fz_drop_buffer(ctx, t); | |
| 1870 fz_throw(ctx, FZ_ERROR_FORMAT, "Truncated post table"); | |
| 1871 } | |
| 1872 | |
| 1873 fmt = get32(d); | |
| 1874 | |
| 1875 if (fmt != 0x00020000) | |
| 1876 { | |
| 1877 /* Fmt 1: Nothing to be gained by having this table. The cmap should | |
| 1878 * have all the mappings anyway, and we'll have broken it by renumbering | |
| 1879 * the gids down anyway. */ | |
| 1880 /* Fmt 2.5 deprecated. */ | |
| 1881 /* Fmt 3 and 4: should not be used for PDF. */ | |
| 1882 /* No other formats defined. */ | |
| 1883 fz_drop_buffer(ctx, t); | |
| 1884 return; | |
| 1885 } | |
| 1886 d += 32; len -= 32; | |
| 1887 fz_try(ctx) | |
| 1888 len = subset_post2(ctx, ttf, d, len, gids, num_gids); | |
| 1889 fz_catch(ctx) | |
| 1890 { | |
| 1891 fz_drop_buffer(ctx, t); | |
| 1892 fz_rethrow(ctx); | |
| 1893 } | |
| 1894 | |
| 1895 t->len = 32 + len; | |
| 1896 | |
| 1897 add_table(ctx, ttf, TAG("post"), t); | |
| 1898 } | |
| 1899 | |
| 1900 static void | |
| 1901 subset_CFF(fz_context *ctx, ttf_t *ttf, fz_stream *stm, int *gids, int num_gids, int symbolic, int cidfont) | |
| 1902 { | |
| 1903 fz_buffer *t = read_table(ctx, stm, TAG("CFF "), 1); | |
| 1904 fz_buffer *sub = NULL; | |
| 1905 | |
| 1906 fz_var(sub); | |
| 1907 | |
| 1908 fz_try(ctx) | |
| 1909 sub = fz_subset_cff_for_gids(ctx, t, gids, num_gids, symbolic, cidfont); | |
| 1910 fz_always(ctx) | |
| 1911 fz_drop_buffer(ctx, t); | |
| 1912 fz_catch(ctx) | |
| 1913 fz_rethrow(ctx); | |
| 1914 | |
| 1915 add_table(ctx, ttf, TAG("CFF "), sub); | |
| 1916 } | |
| 1917 | |
| 1918 fz_buffer * | |
| 1919 fz_subset_ttf_for_gids(fz_context *ctx, fz_buffer *orig, int *gids, int num_gids, int symbolic, int cidfont) | |
| 1920 { | |
| 1921 fz_stream *stm = fz_open_buffer(ctx, orig); | |
| 1922 ttf_t ttf = { 0 }; | |
| 1923 fz_buffer *newbuf = NULL; | |
| 1924 fz_output *out = NULL; | |
| 1925 | |
| 1926 fz_var(newbuf); | |
| 1927 fz_var(out); | |
| 1928 | |
| 1929 fz_try(ctx) | |
| 1930 { | |
| 1931 ttf.is_otf = (fz_read_uint32_le(ctx, stm) == 0x4f54544f); | |
| 1932 ttf.symbolic = symbolic; | |
| 1933 | |
| 1934 /* Subset the name table. No other dependencies. */ | |
| 1935 subset_name_table(ctx, &ttf, stm); | |
| 1936 | |
| 1937 if (!cidfont) | |
| 1938 { | |
| 1939 /* Load the encoding. Populates the encoding table from the cmap table | |
| 1940 * in the original. cmap table is then discarded. */ | |
| 1941 load_encoding(ctx, &ttf, stm); | |
| 1942 | |
| 1943 /* Blank out the bits of the encoding we don't need. */ | |
| 1944 reduce_encoding(ctx, &ttf, gids, num_gids); | |
| 1945 } | |
| 1946 | |
| 1947 /* Read maxp and store the table. Remember orig_num_glyphs. */ | |
| 1948 read_maxp(ctx, &ttf, stm); | |
| 1949 | |
| 1950 /* Read head and store the table. Remember the loca index size. */ | |
| 1951 read_head(ctx, &ttf, stm); | |
| 1952 | |
| 1953 if (ttf.is_otf) | |
| 1954 { | |
| 1955 subset_CFF(ctx, &ttf, stm, gids, num_gids, symbolic, cidfont); | |
| 1956 } | |
| 1957 | |
| 1958 /* Read loca and store it. Stash a pointer to the table for quick access. */ | |
| 1959 if (!ttf.is_otf) | |
| 1960 { | |
| 1961 read_loca(ctx, &ttf, stm); | |
| 1962 | |
| 1963 /* Read the glyf data, and scan it for composites. This makes the gid_renum table, | |
| 1964 * subsets the glyf data, and rewrites the loca table. */ | |
| 1965 read_glyf(ctx, &ttf, stm, gids, num_gids); | |
| 1966 } | |
| 1967 | |
| 1968 /* Read hhea and store it. Remember numOfLongHorMetrics. */ | |
| 1969 read_hhea(ctx, &ttf, stm); | |
| 1970 | |
| 1971 /* Read and subset hmtx. */ | |
| 1972 subset_hmtx(ctx, &ttf, stm); | |
| 1973 | |
| 1974 #ifdef DEBUG_SUBSETTING | |
| 1975 if (!cidfont) | |
| 1976 { | |
| 1977 encoding_t *enc = ttf.encoding; | |
| 1978 uint32_t i, n = enc->max; | |
| 1979 | |
| 1980 for (i = 0; i < n; i++) | |
| 1981 if (enc->gid[i]) | |
| 1982 printf("cid %x '%c'-> orig gid %d -> gid %d\n", i, (char)i, enc->gid[i], ttf.gid_renum[enc->gid[i]]); | |
| 1983 } | |
| 1984 { | |
| 1985 uint32_t i; | |
| 1986 | |
| 1987 for (i = 0; i < ttf.orig_num_glyphs; i++) | |
| 1988 if (ttf.gid_renum[i]) | |
| 1989 printf("gid %d -> %d\n", i, ttf.gid_renum[i]); | |
| 1990 | |
| 1991 for (i = 0; i <= ttf.new_num_glyphs; i++) | |
| 1992 printf("LOCA %d = %x\n", i, get_loca(ctx, &ttf, i)); | |
| 1993 } | |
| 1994 #endif | |
| 1995 if (!ttf.is_otf) | |
| 1996 { | |
| 1997 shrink_loca_if_possible(ctx, &ttf); | |
| 1998 | |
| 1999 update_num_glyphs(ctx, &ttf); | |
| 2000 } | |
| 2001 | |
| 2002 if (!cidfont) | |
| 2003 { | |
| 2004 /* Now we can make the new cmap. */ | |
| 2005 make_cmap(ctx, &ttf); | |
| 2006 } | |
| 2007 | |
| 2008 if (!cidfont) | |
| 2009 { | |
| 2010 /* subset the post table */ | |
| 2011 subset_post(ctx, &ttf, stm, gids, num_gids); | |
| 2012 } | |
| 2013 | |
| 2014 copy_table(ctx, &ttf, stm, TAG("OS/2"), 0); | |
| 2015 copy_table(ctx, &ttf, stm, TAG("cvt "), 0); | |
| 2016 copy_table(ctx, &ttf, stm, TAG("fpgm"), 0); | |
| 2017 copy_table(ctx, &ttf, stm, TAG("prep"), 0); | |
| 2018 | |
| 2019 sort_tables(ctx, &ttf); | |
| 2020 checksum_tables(ctx, &ttf); | |
| 2021 | |
| 2022 newbuf = fz_new_buffer(ctx, 1024); | |
| 2023 out = fz_new_output_with_buffer(ctx, newbuf); | |
| 2024 | |
| 2025 write_tables(ctx, &ttf, out); | |
| 2026 | |
| 2027 fz_close_output(ctx, out); | |
| 2028 | |
| 2029 fix_checksum(ctx, newbuf); | |
| 2030 } | |
| 2031 fz_always(ctx) | |
| 2032 { | |
| 2033 int i; | |
| 2034 | |
| 2035 fz_drop_output(ctx, out); | |
| 2036 fz_drop_stream(ctx, stm); | |
| 2037 for (i = 0; i < ttf.len; i++) | |
| 2038 fz_drop_buffer(ctx, ttf.table[i].tab); | |
| 2039 fz_free(ctx, ttf.table); | |
| 2040 fz_free(ctx, ttf.gid_renum); | |
| 2041 fz_free(ctx, ttf.encoding); | |
| 2042 } | |
| 2043 fz_catch(ctx) | |
| 2044 { | |
| 2045 fz_drop_buffer(ctx, newbuf); | |
| 2046 fz_rethrow(ctx); | |
| 2047 } | |
| 2048 | |
| 2049 return newbuf; | |
| 2050 } |
