comparison mupdf-source/source/xps/xps-glyphs.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "xps-imp.h"
25
26 #include <ft2build.h>
27 #include FT_FREETYPE_H
28 #include FT_ADVANCES_H
29
30 static inline int ishex(int a)
31 {
32 return (a >= 'A' && a <= 'F') ||
33 (a >= 'a' && a <= 'f') ||
34 (a >= '0' && a <= '9');
35 }
36
37 static inline int unhex(int a)
38 {
39 if (a >= 'A' && a <= 'F') return a - 'A' + 0xA;
40 if (a >= 'a' && a <= 'f') return a - 'a' + 0xA;
41 if (a >= '0' && a <= '9') return a - '0';
42 return 0;
43 }
44
45 int
46 xps_count_font_encodings(fz_context *ctx, fz_font *font)
47 {
48 FT_Face face = fz_font_ft_face(ctx, font);
49 return face->num_charmaps;
50 }
51
52 void
53 xps_identify_font_encoding(fz_context *ctx, fz_font *font, int idx, int *pid, int *eid)
54 {
55 FT_Face face = fz_font_ft_face(ctx, font);
56 *pid = face->charmaps[idx]->platform_id;
57 *eid = face->charmaps[idx]->encoding_id;
58 }
59
60 void
61 xps_select_font_encoding(fz_context *ctx, fz_font *font, int idx)
62 {
63 FT_Face face = fz_font_ft_face(ctx, font);
64 fz_ft_lock(ctx);
65 FT_Set_Charmap(face, face->charmaps[idx]);
66 fz_ft_unlock(ctx);
67 }
68
69 int
70 xps_encode_font_char(fz_context *ctx, fz_font *font, int code)
71 {
72 FT_Face face = fz_font_ft_face(ctx, font);
73 int gid;
74 fz_ft_lock(ctx);
75 gid = FT_Get_Char_Index(face, code);
76 if (gid == 0 && face->charmap && face->charmap->platform_id == 3 && face->charmap->encoding_id == 0)
77 gid = FT_Get_Char_Index(face, 0xF000 | code);
78 fz_ft_unlock(ctx);
79 return gid;
80 }
81
82 void
83 xps_measure_font_glyph(fz_context *ctx, xps_document *doc, fz_font *font, int gid, xps_glyph_metrics *mtx)
84 {
85 int mask = FT_LOAD_NO_SCALE | FT_LOAD_IGNORE_TRANSFORM;
86 FT_Face face = fz_font_ft_face(ctx, font);
87 FT_Fixed hadv = 0, vadv = 0;
88
89 fz_ft_lock(ctx);
90 FT_Get_Advance(face, gid, mask, &hadv);
91 FT_Get_Advance(face, gid, mask | FT_LOAD_VERTICAL_LAYOUT, &vadv);
92 fz_ft_unlock(ctx);
93
94 mtx->hadv = (float) hadv / face->units_per_EM;
95 mtx->vadv = (float) vadv / face->units_per_EM;
96 mtx->vorg = (float) face->ascender / face->units_per_EM;
97 }
98
99 static fz_font *
100 xps_lookup_font_imp(fz_context *ctx, xps_document *doc, char *name)
101 {
102 xps_font_cache *cache;
103 for (cache = doc->font_table; cache; cache = cache->next)
104 if (!xps_strcasecmp(cache->name, name))
105 return fz_keep_font(ctx, cache->font);
106 return NULL;
107 }
108
109 static void
110 xps_insert_font(fz_context *ctx, xps_document *doc, char *name, fz_font *font)
111 {
112 xps_font_cache *cache = fz_malloc_struct(ctx, xps_font_cache);
113 cache->font = NULL;
114 cache->name = NULL;
115
116 fz_try(ctx)
117 {
118 cache->font = fz_keep_font(ctx, font);
119 cache->name = fz_strdup(ctx, name);
120 cache->next = doc->font_table;
121 }
122 fz_catch(ctx)
123 {
124 fz_drop_font(ctx, cache->font);
125 fz_free(ctx, cache->name);
126 fz_free(ctx, cache);
127 fz_rethrow(ctx);
128 }
129
130 doc->font_table = cache;
131 }
132
133 /*
134 * Some fonts in XPS are obfuscated by XOR:ing the first 32 bytes of the
135 * data with the GUID in the fontname.
136 */
137 static void
138 xps_deobfuscate_font_resource(fz_context *ctx, xps_document *doc, xps_part *part)
139 {
140 unsigned char buf[33];
141 unsigned char key[16];
142 unsigned char *data;
143 size_t size;
144 char *p;
145 int i;
146
147 size = fz_buffer_storage(ctx, part->data, &data);
148 if (size < 32)
149 {
150 fz_warn(ctx, "insufficient data for font deobfuscation");
151 return;
152 }
153
154 p = strrchr(part->name, '/');
155 if (!p)
156 p = part->name;
157
158 for (i = 0; i < 32 && *p; p++)
159 {
160 if (ishex(*p))
161 buf[i++] = *p;
162 }
163 buf[i] = 0;
164
165 if (i != 32)
166 {
167 fz_warn(ctx, "cannot extract GUID from obfuscated font part name");
168 return;
169 }
170
171 for (i = 0; i < 16; i++)
172 key[i] = unhex(buf[i*2+0]) * 16 + unhex(buf[i*2+1]);
173
174 for (i = 0; i < 16; i++)
175 {
176 data[i] ^= key[15-i];
177 data[i+16] ^= key[15-i];
178 }
179 }
180
181 static void
182 xps_select_best_font_encoding(fz_context *ctx, xps_document *doc, fz_font *font)
183 {
184 static struct { int pid, eid; } xps_cmap_list[] =
185 {
186 { 3, 10 }, /* Unicode with surrogates */
187 { 3, 1 }, /* Unicode without surrogates */
188 { 3, 5 }, /* Wansung */
189 { 3, 4 }, /* Big5 */
190 { 3, 3 }, /* Prc */
191 { 3, 2 }, /* ShiftJis */
192 { 3, 0 }, /* Symbol */
193 { 1, 0 },
194 { -1, -1 },
195 };
196
197 int i, k, n, pid, eid;
198
199 n = xps_count_font_encodings(ctx, font);
200 for (k = 0; xps_cmap_list[k].pid != -1; k++)
201 {
202 for (i = 0; i < n; i++)
203 {
204 xps_identify_font_encoding(ctx, font, i, &pid, &eid);
205 if (pid == xps_cmap_list[k].pid && eid == xps_cmap_list[k].eid)
206 {
207 xps_select_font_encoding(ctx, font, i);
208 return;
209 }
210 }
211 }
212
213 fz_warn(ctx, "cannot find a suitable cmap");
214 }
215
216 fz_font *
217 xps_lookup_font(fz_context *ctx, xps_document *doc, char *base_uri, char *font_uri, char *style_att)
218 {
219 char partname[1024];
220 char fakename[1024];
221 char *subfont;
222 int subfontid = 0;
223 xps_part *part;
224 fz_font *font;
225
226 xps_resolve_url(ctx, doc, partname, base_uri, font_uri, sizeof partname);
227 subfont = strrchr(partname, '#');
228 if (subfont)
229 {
230 subfontid = atoi(subfont + 1);
231 *subfont = 0;
232 }
233
234 /* Make a new part name for font with style simulation applied */
235 fz_strlcpy(fakename, partname, sizeof fakename);
236 if (style_att)
237 {
238 if (!strcmp(style_att, "BoldSimulation"))
239 fz_strlcat(fakename, "#Bold", sizeof fakename);
240 else if (!strcmp(style_att, "ItalicSimulation"))
241 fz_strlcat(fakename, "#Italic", sizeof fakename);
242 else if (!strcmp(style_att, "BoldItalicSimulation"))
243 fz_strlcat(fakename, "#BoldItalic", sizeof fakename);
244 }
245
246 font = xps_lookup_font_imp(ctx, doc, fakename);
247 if (!font)
248 {
249 fz_buffer *buf = NULL;
250 fz_var(buf);
251
252 fz_try(ctx)
253 {
254 part = xps_read_part(ctx, doc, partname);
255 }
256 fz_catch(ctx)
257 {
258 if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
259 {
260 if (doc->cookie)
261 {
262 doc->cookie->incomplete = 1;
263 fz_ignore_error(ctx);
264 }
265 else
266 fz_rethrow(ctx);
267 }
268 else
269 {
270 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
271 fz_report_error(ctx);
272 fz_warn(ctx, "cannot find font resource part '%s'", partname);
273 }
274 return NULL;
275 }
276
277 /* deobfuscate if necessary */
278 if (strstr(part->name, ".odttf"))
279 xps_deobfuscate_font_resource(ctx, doc, part);
280 if (strstr(part->name, ".ODTTF"))
281 xps_deobfuscate_font_resource(ctx, doc, part);
282
283 fz_var(font);
284 fz_try(ctx)
285 {
286 font = fz_new_font_from_buffer(ctx, NULL, part->data, subfontid, 1);
287 xps_select_best_font_encoding(ctx, doc, font);
288 xps_insert_font(ctx, doc, fakename, font);
289 }
290 fz_always(ctx)
291 {
292 xps_drop_part(ctx, doc, part);
293 }
294 fz_catch(ctx)
295 {
296 fz_drop_font(ctx, font);
297 fz_warn(ctx, "cannot load font resource '%s'", partname);
298 return NULL;
299 }
300
301 if (style_att)
302 {
303 fz_font_flags_t *flags = fz_font_flags(font);
304 int bold = !!strstr(style_att, "Bold");
305 int italic = !!strstr(style_att, "Italic");
306 flags->fake_bold = bold;
307 flags->is_bold = bold;
308 flags->fake_italic = italic;
309 flags->is_italic = italic;
310 }
311 }
312 return font;
313 }
314
315 /*
316 * Parse and draw an XPS <Glyphs> element.
317 *
318 * Indices syntax:
319
320 GlyphIndices = GlyphMapping ( ";" GlyphMapping )
321 GlyphMapping = ( [ClusterMapping] GlyphIndex ) [GlyphMetrics]
322 ClusterMapping = "(" ClusterCodeUnitCount [":" ClusterGlyphCount] ")"
323 ClusterCodeUnitCount = * DIGIT
324 ClusterGlyphCount = * DIGIT
325 GlyphIndex = * DIGIT
326 GlyphMetrics = "," AdvanceWidth ["," uOffset ["," vOffset]]
327 AdvanceWidth = ["+"] RealNum
328 uOffset = ["+" | "-"] RealNum
329 vOffset = ["+" | "-"] RealNum
330 RealNum = ((DIGIT ["." DIGIT]) | ("." DIGIT)) [Exponent]
331 Exponent = ( ("E"|"e") ("+"|"-") DIGIT )
332
333 */
334
335 static char *
336 xps_parse_digits(char *s, int *digit)
337 {
338 *digit = 0;
339 while (*s >= '0' && *s <= '9')
340 {
341 *digit = *digit * 10 + (*s - '0');
342 s ++;
343 }
344 return s;
345 }
346
347 static char *
348 xps_parse_real_num(char *s, float *number, int *override)
349 {
350 char *tail;
351 float v;
352 v = fz_strtof(s, &tail);
353 *override = tail != s;
354 if (*override)
355 *number = v;
356 return tail;
357 }
358
359 static char *
360 xps_parse_cluster_mapping(char *s, int *code_count, int *glyph_count)
361 {
362 if (*s == '(')
363 s = xps_parse_digits(s + 1, code_count);
364 if (*s == ':')
365 s = xps_parse_digits(s + 1, glyph_count);
366 if (*s == ')')
367 s ++;
368 return s;
369 }
370
371 static char *
372 xps_parse_glyph_index(char *s, int *glyph_index)
373 {
374 if (*s >= '0' && *s <= '9')
375 s = xps_parse_digits(s, glyph_index);
376 return s;
377 }
378
379 static char *
380 xps_parse_glyph_metrics(char *s, float *advance, float *uofs, float *vofs, int bidi_level)
381 {
382 int override;
383 if (*s == ',')
384 {
385 s = xps_parse_real_num(s + 1, advance, &override);
386 if (override && (bidi_level & 1))
387 *advance = -*advance;
388 }
389 if (*s == ',')
390 s = xps_parse_real_num(s + 1, uofs, &override);
391 if (*s == ',')
392 s = xps_parse_real_num(s + 1, vofs, &override);
393 return s;
394 }
395
396 fz_text *
397 xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, fz_matrix ctm,
398 fz_font *font, float size, float originx, float originy,
399 int is_sideways, int bidi_level,
400 char *indices, char *unicode)
401 {
402 xps_glyph_metrics mtx;
403 fz_text *text;
404 fz_matrix tm;
405 float x = originx;
406 float y = originy;
407 char *us = unicode;
408 char *is = indices;
409 size_t un = 0;
410
411 if (!unicode && !indices)
412 fz_warn(ctx, "glyphs element with neither characters nor indices");
413
414 if (us)
415 {
416 if (us[0] == '{' && us[1] == '}')
417 us = us + 2;
418 un = strlen(us);
419 }
420
421 if (is_sideways)
422 tm = fz_pre_scale(fz_rotate(90), -size, size);
423 else
424 tm = fz_scale(size, -size);
425
426 text = fz_new_text(ctx);
427
428 fz_try(ctx)
429 {
430 while ((us && un > 0) || (is && *is))
431 {
432 int char_code = FZ_REPLACEMENT_CHARACTER;
433 int code_count = 1;
434 int glyph_count = 1;
435
436 if (is && *is)
437 {
438 is = xps_parse_cluster_mapping(is, &code_count, &glyph_count);
439 }
440
441 if (code_count < 1)
442 code_count = 1;
443 if (glyph_count < 1)
444 glyph_count = 1;
445
446 /* TODO: add code chars with cluster mappings for text extraction */
447
448 while (code_count--)
449 {
450 if (us && un > 0)
451 {
452 int t = fz_chartorune(&char_code, us);
453 us += t; un -= t;
454 }
455 }
456
457 while (glyph_count--)
458 {
459 int glyph_index = -1;
460 float u_offset = 0;
461 float v_offset = 0;
462 float advance;
463 int dir;
464
465 if (is && *is)
466 is = xps_parse_glyph_index(is, &glyph_index);
467
468 if (glyph_index == -1)
469 glyph_index = xps_encode_font_char(ctx, font, char_code);
470
471 xps_measure_font_glyph(ctx, doc, font, glyph_index, &mtx);
472 if (is_sideways)
473 advance = mtx.vadv * 100;
474 else if (bidi_level & 1)
475 advance = -mtx.hadv * 100;
476 else
477 advance = mtx.hadv * 100;
478
479 if (fz_font_flags(font)->fake_bold)
480 advance *= 1.02f;
481
482 if (is && *is)
483 {
484 is = xps_parse_glyph_metrics(is, &advance, &u_offset, &v_offset, bidi_level);
485 if (*is == ';')
486 is ++;
487 }
488
489 if (bidi_level & 1)
490 u_offset = -mtx.hadv * 100 - u_offset;
491
492 u_offset = u_offset * 0.01f * size;
493 v_offset = v_offset * 0.01f * size;
494
495 if (is_sideways)
496 {
497 tm.e = x + u_offset + (mtx.vorg * size);
498 tm.f = y - v_offset + (mtx.hadv * 0.5f * size);
499 }
500 else
501 {
502 tm.e = x + u_offset;
503 tm.f = y - v_offset;
504 }
505
506 dir = bidi_level & 1 ? FZ_BIDI_RTL : FZ_BIDI_LTR;
507 fz_show_glyph(ctx, text, font, tm, glyph_index, char_code, is_sideways, bidi_level, dir, FZ_LANG_UNSET);
508
509 x += advance * 0.01f * size;
510 }
511 }
512 }
513 fz_catch(ctx)
514 {
515 fz_drop_text(ctx, text);
516 fz_rethrow(ctx);
517 }
518
519 return text;
520 }
521
522 void
523 xps_parse_glyphs(fz_context *ctx, xps_document *doc, fz_matrix ctm,
524 char *base_uri, xps_resource *dict, fz_xml *root)
525 {
526 fz_device *dev = doc->dev;
527
528 fz_xml *node;
529
530 char *fill_uri;
531 char *opacity_mask_uri;
532
533 char *bidi_level_att;
534 char *fill_att;
535 char *font_size_att;
536 char *font_uri_att;
537 char *origin_x_att;
538 char *origin_y_att;
539 char *is_sideways_att;
540 char *indices_att;
541 char *unicode_att;
542 char *style_att;
543 char *transform_att;
544 char *clip_att;
545 char *opacity_att;
546 char *opacity_mask_att;
547
548 fz_xml *transform_tag = NULL;
549 fz_xml *clip_tag = NULL;
550 fz_xml *fill_tag = NULL;
551 fz_xml *opacity_mask_tag = NULL;
552
553 char *fill_opacity_att = NULL;
554
555 fz_font *font;
556
557 float font_size = 10;
558 int is_sideways = 0;
559 int bidi_level = 0;
560
561 fz_text *text = NULL;
562 fz_rect area;
563
564 /*
565 * Extract attributes and extended attributes.
566 */
567
568 bidi_level_att = fz_xml_att(root, "BidiLevel");
569 fill_att = fz_xml_att(root, "Fill");
570 font_size_att = fz_xml_att(root, "FontRenderingEmSize");
571 font_uri_att = fz_xml_att(root, "FontUri");
572 origin_x_att = fz_xml_att(root, "OriginX");
573 origin_y_att = fz_xml_att(root, "OriginY");
574 is_sideways_att = fz_xml_att(root, "IsSideways");
575 indices_att = fz_xml_att(root, "Indices");
576 unicode_att = fz_xml_att(root, "UnicodeString");
577 style_att = fz_xml_att(root, "StyleSimulations");
578 transform_att = fz_xml_att(root, "RenderTransform");
579 clip_att = fz_xml_att(root, "Clip");
580 opacity_att = fz_xml_att(root, "Opacity");
581 opacity_mask_att = fz_xml_att(root, "OpacityMask");
582
583 for (node = fz_xml_down(root); node; node = fz_xml_next(node))
584 {
585 if (fz_xml_is_tag(node, "Glyphs.RenderTransform"))
586 transform_tag = fz_xml_down(node);
587 if (fz_xml_is_tag(node, "Glyphs.OpacityMask"))
588 opacity_mask_tag = fz_xml_down(node);
589 if (fz_xml_is_tag(node, "Glyphs.Clip"))
590 clip_tag = fz_xml_down(node);
591 if (fz_xml_is_tag(node, "Glyphs.Fill"))
592 fill_tag = fz_xml_down(node);
593 }
594
595 fill_uri = base_uri;
596 opacity_mask_uri = base_uri;
597
598 xps_resolve_resource_reference(ctx, doc, dict, &transform_att, &transform_tag, NULL);
599 xps_resolve_resource_reference(ctx, doc, dict, &clip_att, &clip_tag, NULL);
600 xps_resolve_resource_reference(ctx, doc, dict, &fill_att, &fill_tag, &fill_uri);
601 xps_resolve_resource_reference(ctx, doc, dict, &opacity_mask_att, &opacity_mask_tag, &opacity_mask_uri);
602
603 /*
604 * Check that we have all the necessary information.
605 */
606
607 if (!font_size_att || !font_uri_att || !origin_x_att || !origin_y_att) {
608 fz_warn(ctx, "missing attributes in glyphs element");
609 return;
610 }
611
612 if (!indices_att && !unicode_att)
613 return; /* nothing to draw */
614
615 if (is_sideways_att)
616 is_sideways = !strcmp(is_sideways_att, "true");
617
618 if (bidi_level_att)
619 bidi_level = atoi(bidi_level_att);
620
621 /*
622 * Find and load the font resource.
623 */
624
625 font = xps_lookup_font(ctx, doc, base_uri, font_uri_att, style_att);
626 if (!font)
627 font = fz_new_base14_font(ctx, "Times-Roman");
628
629 fz_var(text);
630
631 fz_try(ctx)
632 {
633 /*
634 * Set up graphics state.
635 */
636
637 ctm = xps_parse_transform(ctx, doc, transform_att, transform_tag, ctm);
638
639 if (clip_att || clip_tag)
640 xps_clip(ctx, doc, ctm, dict, clip_att, clip_tag);
641
642 font_size = fz_atof(font_size_att);
643
644 text = xps_parse_glyphs_imp(ctx, doc, ctm, font, font_size,
645 fz_atof(origin_x_att), fz_atof(origin_y_att),
646 is_sideways, bidi_level, indices_att, unicode_att);
647
648 area = fz_bound_text(ctx, text, NULL, ctm);
649
650 xps_begin_opacity(ctx, doc, ctm, area, opacity_mask_uri, dict, opacity_att, opacity_mask_tag);
651
652 /* If it's a solid color brush fill/stroke do a simple fill */
653
654 if (fz_xml_is_tag(fill_tag, "SolidColorBrush"))
655 {
656 fill_opacity_att = fz_xml_att(fill_tag, "Opacity");
657 fill_att = fz_xml_att(fill_tag, "Color");
658 fill_tag = NULL;
659 }
660
661 if (fill_att)
662 {
663 float samples[FZ_MAX_COLORS];
664 fz_colorspace *colorspace;
665
666 xps_parse_color(ctx, doc, base_uri, fill_att, &colorspace, samples);
667 if (fill_opacity_att)
668 samples[0] *= fz_atof(fill_opacity_att);
669 xps_set_color(ctx, doc, colorspace, samples);
670
671 fz_fill_text(ctx, dev, text, ctm, doc->colorspace, doc->color, doc->alpha, fz_default_color_params);
672 }
673
674 /* If it's a complex brush, use the charpath as a clip mask */
675
676 if (fill_tag)
677 {
678 fz_clip_text(ctx, dev, text, ctm, area);
679 xps_parse_brush(ctx, doc, ctm, area, fill_uri, dict, fill_tag);
680 fz_pop_clip(ctx, dev);
681 }
682
683 xps_end_opacity(ctx, doc, opacity_mask_uri, dict, opacity_att, opacity_mask_tag);
684
685 if (clip_att || clip_tag)
686 fz_pop_clip(ctx, dev);
687 }
688 fz_always(ctx)
689 {
690 fz_drop_text(ctx, text);
691 fz_drop_font(ctx, font);
692 }
693 fz_catch(ctx)
694 fz_rethrow(ctx);
695 }