comparison mupdf-source/source/pdf/pdf-cmap.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <assert.h>
27 #include <string.h>
28
29 #undef CHECK_SPLAY
30 #undef DUMP_SPLAY
31
32 /*
33 * Allocate, destroy and simple parameters.
34 */
35
36 void
37 pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
38 {
39 pdf_cmap *cmap = (pdf_cmap *)cmap_;
40 pdf_drop_cmap(ctx, cmap->usecmap);
41 fz_free(ctx, cmap->ranges);
42 fz_free(ctx, cmap->xranges);
43 fz_free(ctx, cmap->mranges);
44 fz_free(ctx, cmap->dict);
45 fz_free(ctx, cmap->tree);
46 fz_free(ctx, cmap);
47 }
48
49 pdf_cmap *
50 pdf_new_cmap(fz_context *ctx)
51 {
52 pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
53 FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
54 return cmap;
55 }
56
57 /* Could be a macro for speed */
58 pdf_cmap *
59 pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
60 {
61 return fz_keep_storable(ctx, &cmap->storable);
62 }
63
64 /* Could be a macro for speed */
65 void
66 pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
67 {
68 fz_drop_storable(ctx, &cmap->storable);
69 }
70
71 void
72 pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
73 {
74 int i;
75
76 pdf_drop_cmap(ctx, cmap->usecmap);
77 cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
78
79 if (cmap->codespace_len == 0)
80 {
81 cmap->codespace_len = usecmap->codespace_len;
82 for (i = 0; i < usecmap->codespace_len; i++)
83 cmap->codespace[i] = usecmap->codespace[i];
84 }
85 }
86
87 int
88 pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
89 {
90 return cmap->wmode;
91 }
92
93 void
94 pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
95 {
96 cmap->wmode = wmode;
97 }
98
99 void
100 pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n)
101 {
102 if (cmap->codespace_len + 1 == nelem(cmap->codespace))
103 {
104 fz_warn(ctx, "assert: too many code space ranges");
105 return;
106 }
107
108 if ((uint32_t)n != n)
109 {
110 fz_warn(ctx, "assert: code space range too large");
111 return;
112 }
113
114 cmap->codespace[cmap->codespace_len].n = (int)n;
115 cmap->codespace[cmap->codespace_len].low = low;
116 cmap->codespace[cmap->codespace_len].high = high;
117 cmap->codespace_len ++;
118 }
119
120 struct cmap_splay {
121 unsigned int low;
122 unsigned int high;
123 unsigned int out;
124 unsigned int left;
125 unsigned int right;
126 unsigned int parent : 31;
127 unsigned int many : 1;
128 };
129
130 #define EMPTY ((unsigned int)0x40000000)
131
132 /*
133 The splaying steps used:
134
135 Case 1: | z x
136 | y D => A y
137 | x C B z
138 | A B C D
139
140 Case 2: | z x
141 | y D => y z
142 | A x A B C D
143 | B C
144
145 Case 3: | y x
146 | x C => A y
147 | A B B C
148 */
149
150 static void
151 move_to_root(cmap_splay *tree, unsigned int x)
152 {
153 if (x == EMPTY)
154 return;
155 do
156 {
157 unsigned int z, zp;
158 unsigned int y = tree[x].parent;
159 if (y == EMPTY)
160 break;
161 z = tree[y].parent;
162 if (z == EMPTY)
163 {
164 /* Case 3 */
165 tree[x].parent = EMPTY;
166 tree[y].parent = x;
167 if (tree[y].left == x)
168 {
169 /* Case 3 */
170 tree[y].left = tree[x].right;
171 if (tree[y].left != EMPTY)
172 tree[tree[y].left].parent = y;
173 tree[x].right = y;
174 }
175 else
176 {
177 /* Case 3 - reflected */
178 assert(tree[y].right == x);
179 tree[y].right = tree[x].left;
180 if (tree[y].right != EMPTY)
181 tree[tree[y].right].parent = y;
182 tree[x].left = y;
183 }
184 break;
185 }
186
187 zp = tree[z].parent;
188 tree[x].parent = zp;
189 if (zp != EMPTY) {
190 if (tree[zp].left == z)
191 tree[zp].left = x;
192 else
193 {
194 assert(tree[zp].right == z);
195 tree[zp].right = x;
196 }
197 }
198 tree[y].parent = x;
199 if (tree[y].left == x)
200 {
201 tree[y].left = tree[x].right;
202 if (tree[y].left != EMPTY)
203 tree[tree[y].left].parent = y;
204 tree[x].right = y;
205 if (tree[z].left == y)
206 {
207 /* Case 1 */
208 tree[z].parent = y;
209 tree[z].left = tree[y].right;
210 if (tree[z].left != EMPTY)
211 tree[tree[z].left].parent = z;
212 tree[y].right = z;
213 }
214 else
215 {
216 /* Case 2 - reflected */
217 assert(tree[z].right == y);
218 tree[z].parent = x;
219 tree[z].right = tree[x].left;
220 if (tree[z].right != EMPTY)
221 tree[tree[z].right].parent = z;
222 tree[x].left = z;
223 }
224 }
225 else
226 {
227 assert(tree[y].right == x);
228 tree[y].right = tree[x].left;
229 if (tree[y].right != EMPTY)
230 tree[tree[y].right].parent = y;
231 tree[x].left = y;
232 if (tree[z].left == y)
233 {
234 /* Case 2 */
235 tree[z].parent = x;
236 tree[z].left = tree[x].right;
237 if (tree[z].left != EMPTY)
238 tree[tree[z].left].parent = z;
239 tree[x].right = z;
240 }
241 else
242 {
243 /* Case 1 - reflected */
244 assert(tree[z].right == y);
245 tree[z].parent = y;
246 tree[z].right = tree[y].left;
247 if (tree[z].right != EMPTY)
248 tree[tree[z].right].parent = z;
249 tree[y].left = z;
250 }
251 }
252 } while (1);
253 }
254
255 static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
256 {
257 cmap_splay *tree = cmap->tree;
258 unsigned int parent;
259 unsigned int replacement;
260
261 assert(current != EMPTY);
262
263 parent = tree[current].parent;
264 if (tree[current].right == EMPTY)
265 {
266 if (parent == EMPTY)
267 {
268 replacement = cmap->ttop = tree[current].left;
269 }
270 else if (tree[parent].left == current)
271 {
272 replacement = tree[parent].left = tree[current].left;
273 }
274 else
275 {
276 assert(tree[parent].right == current);
277 replacement = tree[parent].right = tree[current].left;
278 }
279 if (replacement != EMPTY)
280 tree[replacement].parent = parent;
281 else
282 replacement = parent;
283 }
284 else if (tree[current].left == EMPTY)
285 {
286 if (parent == EMPTY)
287 {
288 replacement = cmap->ttop = tree[current].right;
289 }
290 else if (tree[parent].left == current)
291 {
292 replacement = tree[parent].left = tree[current].right;
293 }
294 else
295 {
296 assert(tree[parent].right == current);
297 replacement = tree[parent].right = tree[current].right;
298 }
299 if (replacement != EMPTY)
300 tree[replacement].parent = parent;
301 else
302 replacement = parent;
303 }
304 else
305 {
306 /* Hard case, find the in-order predecessor of current */
307 unsigned int amputee = current;
308 replacement = tree[current].left;
309 while (tree[replacement].right != EMPTY) {
310 amputee = replacement;
311 replacement = tree[replacement].right;
312 }
313 /* Remove replacement from the tree */
314 if (amputee == current)
315 {
316 tree[amputee].left = tree[replacement].left;
317 if (tree[amputee].left != EMPTY)
318 tree[tree[amputee].left].parent = amputee;
319 }
320 else
321 {
322 tree[amputee].right = tree[replacement].left;
323 if (tree[amputee].right != EMPTY)
324 tree[tree[amputee].right].parent = amputee;
325 }
326 /* Insert replacement in place of current */
327 tree[replacement].parent = parent;
328 if (parent == EMPTY)
329 {
330 tree[replacement].parent = EMPTY;
331 cmap->ttop = replacement;
332 }
333 else if (tree[parent].left == current)
334 tree[parent].left = replacement;
335 else
336 {
337 assert(tree[parent].right == current);
338 tree[parent].right = replacement;
339 }
340 tree[replacement].left = tree[current].left;
341 if (tree[replacement].left != EMPTY)
342 tree[tree[replacement].left].parent = replacement;
343 tree[replacement].right = tree[current].right;
344 if (tree[replacement].right != EMPTY)
345 tree[tree[replacement].right].parent = replacement;
346 }
347
348 /* current is now unlinked. We need to remove it from our array. */
349 cmap->tlen--;
350 if (current != (unsigned int) cmap->tlen)
351 {
352 if (replacement == (unsigned int) cmap->tlen)
353 replacement = current;
354 tree[current] = tree[cmap->tlen];
355 parent = tree[current].parent;
356 if (parent == EMPTY)
357 cmap->ttop = current;
358 else if (tree[parent].left == (unsigned int) cmap->tlen)
359 tree[parent].left = current;
360 else
361 {
362 assert(tree[parent].right == (unsigned int) cmap->tlen);
363 tree[parent].right = current;
364 }
365 if (tree[current].left != EMPTY)
366 {
367 assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen);
368 tree[tree[current].left].parent = current;
369 }
370 if (tree[current].right != EMPTY)
371 {
372 assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen);
373 tree[tree[current].right].parent = current;
374 }
375 }
376
377 /* Return the node that we should continue searching from */
378 return replacement;
379 }
380
381 #ifdef DUMP_SPLAY
382 static void
383 dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
384 {
385 int i;
386
387 if (tree == NULL || node == EMPTY)
388 return;
389
390 for (i = 0; i < depth; i++)
391 fprintf(stderr, " ");
392 fprintf(stderr, "%s%d:", pre, node);
393 if (tree[node].parent == EMPTY)
394 fprintf(stderr, "^EMPTY");
395 else
396 fprintf(stderr, "^%d", tree[node].parent);
397 if (tree[node].left == EMPTY)
398 fprintf(stderr, "<EMPTY");
399 else
400 fprintf(stderr, "<%d", tree[node].left);
401 if (tree[node].right == EMPTY)
402 fprintf(stderr, ">EMPTY");
403 else
404 fprintf(stderr, ">%d", tree[node].right);
405 fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
406 assert(tree[node].parent == EMPTY || depth);
407 assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
408 assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
409 dump_splay(tree, tree[node].left, depth+1, "L");
410 dump_splay(tree, tree[node].right, depth+1, "R");
411 }
412 #endif
413
414 enum
415 {
416 TOP = 0,
417 LEFT = 1,
418 RIGHT = 2
419 };
420
421 static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
422 {
423 int from = TOP;
424
425 while (node != EMPTY)
426 {
427 switch (from)
428 {
429 case TOP:
430 if (tree[node].left != EMPTY)
431 {
432 node = tree[node].left;
433 from = TOP;
434 break;
435 }
436 /* fallthrough */
437 case LEFT:
438 fn(&tree[node], arg);
439 if (tree[node].right != EMPTY)
440 {
441 node = tree[node].right;
442 from = TOP;
443 break;
444 }
445 /* fallthrough */
446 case RIGHT:
447 {
448 unsigned int parent = tree[node].parent;
449 if (parent == EMPTY)
450 return;
451 if (tree[parent].left == node)
452 from = LEFT;
453 else
454 {
455 assert(tree[parent].right == node);
456 from = RIGHT;
457 }
458 node = parent;
459 }
460 }
461 }
462 }
463
464 #ifdef CHECK_SPLAY
465
466 static int
467 tree_has_overlap(cmap_splay *tree, int node, int low, int high)
468 {
469 if (tree[node].left != EMPTY)
470 if (tree_has_overlap(tree, tree[node].left, low, high))
471 return 1;
472 if (tree[node].right != EMPTY)
473 if (tree_has_overlap(tree, tree[node].right, low, high))
474 return 1;
475 return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high);
476 }
477
478 static void
479 do_check(cmap_splay *node, void *arg)
480 {
481 cmap_splay *tree = arg;
482 unsigned int num = node - tree;
483 assert(!node->many || node->low == node->high);
484 assert(node->low <= node->high);
485 assert((node->left == EMPTY) || (tree[node->left].parent == num &&
486 tree[node->left].high < node->low));
487 assert(node->right == EMPTY || (tree[node->right].parent == num &&
488 node->high < tree[node->right].low));
489 assert(!tree_has_overlap(tree, num, node->low, node->high));
490 }
491
492 static void
493 check_splay(cmap_splay *tree, unsigned int node, int depth)
494 {
495 if (node == EMPTY)
496 return;
497 assert(tree[node].parent == EMPTY);
498 walk_splay(tree, node, do_check, tree);
499 }
500 #endif
501
502 /*
503 * Add a range.
504 */
505 static void
506 add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
507 {
508 int current;
509 cmap_splay *tree;
510
511 if (low > high)
512 {
513 fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
514 return;
515 }
516
517 if (cmap->codespace_len == 0)
518 {
519 fz_warn(ctx, "CMap is missing codespace range");
520 pdf_add_codespace(ctx, cmap, 0, 65535, 2);
521 }
522
523 tree = cmap->tree;
524
525 if (cmap->tlen)
526 {
527 unsigned int move = cmap->ttop;
528 unsigned int gt = EMPTY;
529 unsigned int lt = EMPTY;
530 if (check_for_overlap)
531 {
532 /* Check for collision with the current node */
533 do
534 {
535 current = move;
536 /* Cases we might meet:
537 * tree[i]: <----->
538 * case 0: <->
539 * case 1: <------->
540 * case 2: <------------->
541 * case 3: <->
542 * case 4: <------->
543 * case 5: <->
544 */
545 if (low <= tree[current].low && tree[current].low <= high)
546 {
547 /* case 1, reduces to case 0 */
548 /* or case 2, deleting the node */
549 tree[current].out += high + 1 - tree[current].low;
550 tree[current].low = high + 1;
551 if (tree[current].low > tree[current].high)
552 {
553 /* update lt/gt references that will be moved/stale after deleting current */
554 if (gt == (unsigned int) cmap->tlen - 1)
555 gt = current;
556 if (lt == (unsigned int) cmap->tlen - 1)
557 lt = current;
558 /* delete_node() moves the element at cmap->tlen-1 into current */
559 move = delete_node(cmap, current);
560 current = EMPTY;
561 continue;
562 }
563 }
564 else if (low <= tree[current].high && tree[current].high <= high)
565 {
566 /* case 4, reduces to case 5 */
567 tree[current].high = low - 1;
568 assert(tree[current].low <= tree[current].high);
569 }
570 else if (tree[current].low < low && high < tree[current].high)
571 {
572 /* case 3, reduces to case 5 */
573 int new_high = tree[current].high;
574 tree[current].high = low-1;
575 add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, tree[current].many);
576 tree = cmap->tree;
577 }
578 /* Now look for where to move to next (left for case 0, right for case 5) */
579 if (tree[current].low > high) {
580 move = tree[current].left;
581 gt = current;
582 }
583 else
584 {
585 move = tree[current].right;
586 lt = current;
587 }
588 }
589 while (move != EMPTY);
590 }
591 else
592 {
593 do
594 {
595 current = move;
596 if (tree[current].low > high)
597 {
598 move = tree[current].left;
599 gt = current;
600 }
601 else
602 {
603 move = tree[current].right;
604 lt = current;
605 }
606 } while (move != EMPTY);
607 }
608 /* current is now the node to which we would be adding the new node */
609 /* lt is the last node we traversed which is lt the new node. */
610 /* gt is the last node we traversed which is gt the new node. */
611
612 if (!many)
613 {
614 /* Check for the 'merge' cases. */
615 if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
616 {
617 tree[lt].high = high;
618 if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
619 {
620 tree[lt].high = tree[gt].high;
621 delete_node(cmap, gt);
622 }
623 goto exit;
624 }
625 if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
626 {
627 tree[gt].low = low;
628 tree[gt].out = out;
629 goto exit;
630 }
631 }
632 }
633 else
634 current = EMPTY;
635
636 if (cmap->tlen == cmap->tcap)
637 {
638 int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
639 tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay);
640 cmap->tcap = new_cap;
641 }
642 tree[cmap->tlen].low = low;
643 tree[cmap->tlen].high = high;
644 tree[cmap->tlen].out = out;
645 tree[cmap->tlen].parent = current;
646 tree[cmap->tlen].left = EMPTY;
647 tree[cmap->tlen].right = EMPTY;
648 tree[cmap->tlen].many = many;
649 cmap->tlen++;
650 if (current == EMPTY)
651 cmap->ttop = 0;
652 else if (tree[current].low > high)
653 tree[current].left = cmap->tlen-1;
654 else
655 {
656 assert(tree[current].high < low);
657 tree[current].right = cmap->tlen-1;
658 }
659 move_to_root(tree, cmap->tlen-1);
660 cmap->ttop = cmap->tlen-1;
661 exit:
662 {}
663 #ifdef CHECK_SPLAY
664 check_splay(cmap->tree, cmap->ttop, 0);
665 #endif
666 #ifdef DUMP_SPLAY
667 dump_splay(cmap->tree, cmap->ttop, 0, "");
668 #endif
669 }
670
671 /*
672 * Add a one-to-many mapping.
673 */
674 static void
675 add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
676 {
677 int out_pos;
678
679 if (cmap->dlen + len + 1 > cmap->dcap)
680 {
681 int new_cap = cmap->dcap ? cmap->dcap * 2 : 256;
682 cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int);
683 cmap->dcap = new_cap;
684 }
685 out_pos = cmap->dlen;
686 cmap->dict[out_pos] = len;
687 memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
688 cmap->dlen += len + 1;
689
690 add_range(ctx, cmap, low, low, out_pos, 1, 1);
691 }
692
693 void
694 pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
695 {
696 add_range(ctx, cmap, low, high, out, 1, 0);
697 }
698
699 void
700 pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len)
701 {
702 int *ovalues = values;
703 /* len is always restricted to <= 256 by the callers. */
704 int local[256];
705
706 assert(len <= 256);
707
708 /* Decode unicode surrogate pairs. */
709 /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
710 if (len >= 2)
711 {
712 size_t i, j;
713 /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs
714 * with other chars. See bug 706131. */
715 for (i = 0, j = 0; i < len; i++, j++)
716 {
717 int hi = ovalues[i];
718 if (hi >= 0xd800 && hi < 0xdc00 && i < len-1)
719 {
720 int lo = ovalues[i+1];
721 if (lo >= 0xdc00 && lo < 0xe000)
722 {
723 hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000;
724 i++;
725 }
726 }
727 if (values != local)
728 {
729 /* We can't change the callers data, so copy stuff in. */
730 if (j)
731 memcpy(local, values, sizeof(local[0]) * (j-1));
732 values = local;
733 }
734 values[j] = hi;
735 }
736 len = j;
737 }
738
739 if (len == 1)
740 {
741 add_range(ctx, cmap, low, low, values[0], 1, 0);
742 return;
743 }
744
745 if (len > PDF_MRANGE_CAP)
746 {
747 fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
748 return;
749 }
750
751 add_mrange(ctx, cmap, low, values, (int)len);
752 }
753
754 static void
755 count_node_types(cmap_splay *node, void *arg)
756 {
757 int *counts = (int *)arg;
758
759 if (node->many)
760 counts[2]++;
761 else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
762 counts[0]++;
763 else
764 counts[1]++;
765 }
766
767 static void
768 copy_node_types(cmap_splay *node, void *arg)
769 {
770 pdf_cmap *cmap = (pdf_cmap *)arg;
771
772 if (node->many)
773 {
774 assert(node->low == node->high);
775 cmap->mranges[cmap->mlen].low = node->low;
776 cmap->mranges[cmap->mlen].out = node->out;
777 cmap->mlen++;
778 }
779 else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
780 {
781 cmap->ranges[cmap->rlen].low = node->low;
782 cmap->ranges[cmap->rlen].high = node->high;
783 cmap->ranges[cmap->rlen].out = node->out;
784 cmap->rlen++;
785 }
786 else
787 {
788 cmap->xranges[cmap->xlen].low = node->low;
789 cmap->xranges[cmap->xlen].high = node->high;
790 cmap->xranges[cmap->xlen].out = node->out;
791 cmap->xlen++;
792 }
793 }
794
795 void
796 pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
797 {
798 int counts[3];
799
800 if (cmap->tree == NULL)
801 return;
802
803 counts[0] = 0;
804 counts[1] = 0;
805 counts[2] = 0;
806 walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
807
808 cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range");
809 cmap->rcap = counts[0];
810 cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange");
811 cmap->xcap = counts[1];
812 cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange");
813 cmap->mcap = counts[2];
814
815 walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
816
817 fz_free(ctx, cmap->tree);
818 cmap->tree = NULL;
819 }
820
821 int
822 pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
823 {
824 pdf_range *ranges = cmap->ranges;
825 pdf_xrange *xranges = cmap->xranges;
826 int l, r, m;
827
828 l = 0;
829 r = cmap->rlen - 1;
830 while (l <= r)
831 {
832 m = (l + r) >> 1;
833 if (cpt < ranges[m].low)
834 r = m - 1;
835 else if (cpt > ranges[m].high)
836 l = m + 1;
837 else
838 return cpt - ranges[m].low + ranges[m].out;
839 }
840
841 l = 0;
842 r = cmap->xlen - 1;
843 while (l <= r)
844 {
845 m = (l + r) >> 1;
846 if (cpt < xranges[m].low)
847 r = m - 1;
848 else if (cpt > xranges[m].high)
849 l = m + 1;
850 else
851 return cpt - xranges[m].low + xranges[m].out;
852 }
853
854 if (cmap->usecmap)
855 return pdf_lookup_cmap(cmap->usecmap, cpt);
856
857 return -1;
858 }
859
860 int
861 pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
862 {
863 pdf_range *ranges = cmap->ranges;
864 pdf_xrange *xranges = cmap->xranges;
865 pdf_mrange *mranges = cmap->mranges;
866 unsigned int i;
867 int l, r, m;
868
869 l = 0;
870 r = cmap->rlen - 1;
871 while (l <= r)
872 {
873 m = (l + r) >> 1;
874 if (cpt < ranges[m].low)
875 r = m - 1;
876 else if (cpt > ranges[m].high)
877 l = m + 1;
878 else
879 {
880 out[0] = cpt - ranges[m].low + ranges[m].out;
881 return 1;
882 }
883 }
884
885 l = 0;
886 r = cmap->xlen - 1;
887 while (l <= r)
888 {
889 m = (l + r) >> 1;
890 if (cpt < xranges[m].low)
891 r = m - 1;
892 else if (cpt > xranges[m].high)
893 l = m + 1;
894 else
895 {
896 out[0] = cpt - xranges[m].low + xranges[m].out;
897 return 1;
898 }
899 }
900
901 l = 0;
902 r = cmap->mlen - 1;
903 while (l <= r)
904 {
905 m = (l + r) >> 1;
906 if (cpt < mranges[m].low)
907 r = m - 1;
908 else if (cpt > mranges[m].low)
909 l = m + 1;
910 else
911 {
912 int *ptr = &cmap->dict[cmap->mranges[m].out];
913 unsigned int len = (unsigned int)*ptr++;
914 for (i = 0; i < len; ++i)
915 out[i] = *ptr++;
916 return len;
917 }
918 }
919
920 if (cmap->usecmap)
921 return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
922
923 return 0;
924 }
925
926 int
927 pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
928 {
929 unsigned int c;
930 int k, n;
931 int len = end - buf;
932
933 if (len > 4)
934 len = 4;
935
936 c = 0;
937 for (n = 0; n < len; n++)
938 {
939 c = (c << 8) | buf[n];
940 for (k = 0; k < cmap->codespace_len; k++)
941 {
942 if (cmap->codespace[k].n == n + 1)
943 {
944 if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
945 {
946 *cpt = c;
947 return n + 1;
948 }
949 }
950 }
951 }
952
953 *cpt = 0;
954 return 1;
955 }
956
957 size_t
958 pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap)
959 {
960 if (cmap == NULL)
961 return 0;
962 if (cmap->storable.refs < 0)
963 return 0;
964
965 return pdf_cmap_size(ctx, cmap->usecmap) +
966 cmap->rcap * sizeof *cmap->ranges +
967 cmap->xcap * sizeof *cmap->xranges +
968 cmap->mcap * sizeof *cmap->mranges +
969 cmap->tcap * sizeof *cmap->tree +
970 sizeof(*cmap);
971 }