Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-cmap.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2021 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include <assert.h> | |
| 27 #include <string.h> | |
| 28 | |
| 29 #undef CHECK_SPLAY | |
| 30 #undef DUMP_SPLAY | |
| 31 | |
| 32 /* | |
| 33 * Allocate, destroy and simple parameters. | |
| 34 */ | |
| 35 | |
| 36 void | |
| 37 pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_) | |
| 38 { | |
| 39 pdf_cmap *cmap = (pdf_cmap *)cmap_; | |
| 40 pdf_drop_cmap(ctx, cmap->usecmap); | |
| 41 fz_free(ctx, cmap->ranges); | |
| 42 fz_free(ctx, cmap->xranges); | |
| 43 fz_free(ctx, cmap->mranges); | |
| 44 fz_free(ctx, cmap->dict); | |
| 45 fz_free(ctx, cmap->tree); | |
| 46 fz_free(ctx, cmap); | |
| 47 } | |
| 48 | |
| 49 pdf_cmap * | |
| 50 pdf_new_cmap(fz_context *ctx) | |
| 51 { | |
| 52 pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap); | |
| 53 FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp); | |
| 54 return cmap; | |
| 55 } | |
| 56 | |
| 57 /* Could be a macro for speed */ | |
| 58 pdf_cmap * | |
| 59 pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap) | |
| 60 { | |
| 61 return fz_keep_storable(ctx, &cmap->storable); | |
| 62 } | |
| 63 | |
| 64 /* Could be a macro for speed */ | |
| 65 void | |
| 66 pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap) | |
| 67 { | |
| 68 fz_drop_storable(ctx, &cmap->storable); | |
| 69 } | |
| 70 | |
| 71 void | |
| 72 pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap) | |
| 73 { | |
| 74 int i; | |
| 75 | |
| 76 pdf_drop_cmap(ctx, cmap->usecmap); | |
| 77 cmap->usecmap = pdf_keep_cmap(ctx, usecmap); | |
| 78 | |
| 79 if (cmap->codespace_len == 0) | |
| 80 { | |
| 81 cmap->codespace_len = usecmap->codespace_len; | |
| 82 for (i = 0; i < usecmap->codespace_len; i++) | |
| 83 cmap->codespace[i] = usecmap->codespace[i]; | |
| 84 } | |
| 85 } | |
| 86 | |
| 87 int | |
| 88 pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap) | |
| 89 { | |
| 90 return cmap->wmode; | |
| 91 } | |
| 92 | |
| 93 void | |
| 94 pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode) | |
| 95 { | |
| 96 cmap->wmode = wmode; | |
| 97 } | |
| 98 | |
| 99 void | |
| 100 pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n) | |
| 101 { | |
| 102 if (cmap->codespace_len + 1 == nelem(cmap->codespace)) | |
| 103 { | |
| 104 fz_warn(ctx, "assert: too many code space ranges"); | |
| 105 return; | |
| 106 } | |
| 107 | |
| 108 if ((uint32_t)n != n) | |
| 109 { | |
| 110 fz_warn(ctx, "assert: code space range too large"); | |
| 111 return; | |
| 112 } | |
| 113 | |
| 114 cmap->codespace[cmap->codespace_len].n = (int)n; | |
| 115 cmap->codespace[cmap->codespace_len].low = low; | |
| 116 cmap->codespace[cmap->codespace_len].high = high; | |
| 117 cmap->codespace_len ++; | |
| 118 } | |
| 119 | |
| 120 struct cmap_splay { | |
| 121 unsigned int low; | |
| 122 unsigned int high; | |
| 123 unsigned int out; | |
| 124 unsigned int left; | |
| 125 unsigned int right; | |
| 126 unsigned int parent : 31; | |
| 127 unsigned int many : 1; | |
| 128 }; | |
| 129 | |
| 130 #define EMPTY ((unsigned int)0x40000000) | |
| 131 | |
| 132 /* | |
| 133 The splaying steps used: | |
| 134 | |
| 135 Case 1: | z x | |
| 136 | y D => A y | |
| 137 | x C B z | |
| 138 | A B C D | |
| 139 | |
| 140 Case 2: | z x | |
| 141 | y D => y z | |
| 142 | A x A B C D | |
| 143 | B C | |
| 144 | |
| 145 Case 3: | y x | |
| 146 | x C => A y | |
| 147 | A B B C | |
| 148 */ | |
| 149 | |
| 150 static void | |
| 151 move_to_root(cmap_splay *tree, unsigned int x) | |
| 152 { | |
| 153 if (x == EMPTY) | |
| 154 return; | |
| 155 do | |
| 156 { | |
| 157 unsigned int z, zp; | |
| 158 unsigned int y = tree[x].parent; | |
| 159 if (y == EMPTY) | |
| 160 break; | |
| 161 z = tree[y].parent; | |
| 162 if (z == EMPTY) | |
| 163 { | |
| 164 /* Case 3 */ | |
| 165 tree[x].parent = EMPTY; | |
| 166 tree[y].parent = x; | |
| 167 if (tree[y].left == x) | |
| 168 { | |
| 169 /* Case 3 */ | |
| 170 tree[y].left = tree[x].right; | |
| 171 if (tree[y].left != EMPTY) | |
| 172 tree[tree[y].left].parent = y; | |
| 173 tree[x].right = y; | |
| 174 } | |
| 175 else | |
| 176 { | |
| 177 /* Case 3 - reflected */ | |
| 178 assert(tree[y].right == x); | |
| 179 tree[y].right = tree[x].left; | |
| 180 if (tree[y].right != EMPTY) | |
| 181 tree[tree[y].right].parent = y; | |
| 182 tree[x].left = y; | |
| 183 } | |
| 184 break; | |
| 185 } | |
| 186 | |
| 187 zp = tree[z].parent; | |
| 188 tree[x].parent = zp; | |
| 189 if (zp != EMPTY) { | |
| 190 if (tree[zp].left == z) | |
| 191 tree[zp].left = x; | |
| 192 else | |
| 193 { | |
| 194 assert(tree[zp].right == z); | |
| 195 tree[zp].right = x; | |
| 196 } | |
| 197 } | |
| 198 tree[y].parent = x; | |
| 199 if (tree[y].left == x) | |
| 200 { | |
| 201 tree[y].left = tree[x].right; | |
| 202 if (tree[y].left != EMPTY) | |
| 203 tree[tree[y].left].parent = y; | |
| 204 tree[x].right = y; | |
| 205 if (tree[z].left == y) | |
| 206 { | |
| 207 /* Case 1 */ | |
| 208 tree[z].parent = y; | |
| 209 tree[z].left = tree[y].right; | |
| 210 if (tree[z].left != EMPTY) | |
| 211 tree[tree[z].left].parent = z; | |
| 212 tree[y].right = z; | |
| 213 } | |
| 214 else | |
| 215 { | |
| 216 /* Case 2 - reflected */ | |
| 217 assert(tree[z].right == y); | |
| 218 tree[z].parent = x; | |
| 219 tree[z].right = tree[x].left; | |
| 220 if (tree[z].right != EMPTY) | |
| 221 tree[tree[z].right].parent = z; | |
| 222 tree[x].left = z; | |
| 223 } | |
| 224 } | |
| 225 else | |
| 226 { | |
| 227 assert(tree[y].right == x); | |
| 228 tree[y].right = tree[x].left; | |
| 229 if (tree[y].right != EMPTY) | |
| 230 tree[tree[y].right].parent = y; | |
| 231 tree[x].left = y; | |
| 232 if (tree[z].left == y) | |
| 233 { | |
| 234 /* Case 2 */ | |
| 235 tree[z].parent = x; | |
| 236 tree[z].left = tree[x].right; | |
| 237 if (tree[z].left != EMPTY) | |
| 238 tree[tree[z].left].parent = z; | |
| 239 tree[x].right = z; | |
| 240 } | |
| 241 else | |
| 242 { | |
| 243 /* Case 1 - reflected */ | |
| 244 assert(tree[z].right == y); | |
| 245 tree[z].parent = y; | |
| 246 tree[z].right = tree[y].left; | |
| 247 if (tree[z].right != EMPTY) | |
| 248 tree[tree[z].right].parent = z; | |
| 249 tree[y].left = z; | |
| 250 } | |
| 251 } | |
| 252 } while (1); | |
| 253 } | |
| 254 | |
| 255 static unsigned int delete_node(pdf_cmap *cmap, unsigned int current) | |
| 256 { | |
| 257 cmap_splay *tree = cmap->tree; | |
| 258 unsigned int parent; | |
| 259 unsigned int replacement; | |
| 260 | |
| 261 assert(current != EMPTY); | |
| 262 | |
| 263 parent = tree[current].parent; | |
| 264 if (tree[current].right == EMPTY) | |
| 265 { | |
| 266 if (parent == EMPTY) | |
| 267 { | |
| 268 replacement = cmap->ttop = tree[current].left; | |
| 269 } | |
| 270 else if (tree[parent].left == current) | |
| 271 { | |
| 272 replacement = tree[parent].left = tree[current].left; | |
| 273 } | |
| 274 else | |
| 275 { | |
| 276 assert(tree[parent].right == current); | |
| 277 replacement = tree[parent].right = tree[current].left; | |
| 278 } | |
| 279 if (replacement != EMPTY) | |
| 280 tree[replacement].parent = parent; | |
| 281 else | |
| 282 replacement = parent; | |
| 283 } | |
| 284 else if (tree[current].left == EMPTY) | |
| 285 { | |
| 286 if (parent == EMPTY) | |
| 287 { | |
| 288 replacement = cmap->ttop = tree[current].right; | |
| 289 } | |
| 290 else if (tree[parent].left == current) | |
| 291 { | |
| 292 replacement = tree[parent].left = tree[current].right; | |
| 293 } | |
| 294 else | |
| 295 { | |
| 296 assert(tree[parent].right == current); | |
| 297 replacement = tree[parent].right = tree[current].right; | |
| 298 } | |
| 299 if (replacement != EMPTY) | |
| 300 tree[replacement].parent = parent; | |
| 301 else | |
| 302 replacement = parent; | |
| 303 } | |
| 304 else | |
| 305 { | |
| 306 /* Hard case, find the in-order predecessor of current */ | |
| 307 unsigned int amputee = current; | |
| 308 replacement = tree[current].left; | |
| 309 while (tree[replacement].right != EMPTY) { | |
| 310 amputee = replacement; | |
| 311 replacement = tree[replacement].right; | |
| 312 } | |
| 313 /* Remove replacement from the tree */ | |
| 314 if (amputee == current) | |
| 315 { | |
| 316 tree[amputee].left = tree[replacement].left; | |
| 317 if (tree[amputee].left != EMPTY) | |
| 318 tree[tree[amputee].left].parent = amputee; | |
| 319 } | |
| 320 else | |
| 321 { | |
| 322 tree[amputee].right = tree[replacement].left; | |
| 323 if (tree[amputee].right != EMPTY) | |
| 324 tree[tree[amputee].right].parent = amputee; | |
| 325 } | |
| 326 /* Insert replacement in place of current */ | |
| 327 tree[replacement].parent = parent; | |
| 328 if (parent == EMPTY) | |
| 329 { | |
| 330 tree[replacement].parent = EMPTY; | |
| 331 cmap->ttop = replacement; | |
| 332 } | |
| 333 else if (tree[parent].left == current) | |
| 334 tree[parent].left = replacement; | |
| 335 else | |
| 336 { | |
| 337 assert(tree[parent].right == current); | |
| 338 tree[parent].right = replacement; | |
| 339 } | |
| 340 tree[replacement].left = tree[current].left; | |
| 341 if (tree[replacement].left != EMPTY) | |
| 342 tree[tree[replacement].left].parent = replacement; | |
| 343 tree[replacement].right = tree[current].right; | |
| 344 if (tree[replacement].right != EMPTY) | |
| 345 tree[tree[replacement].right].parent = replacement; | |
| 346 } | |
| 347 | |
| 348 /* current is now unlinked. We need to remove it from our array. */ | |
| 349 cmap->tlen--; | |
| 350 if (current != (unsigned int) cmap->tlen) | |
| 351 { | |
| 352 if (replacement == (unsigned int) cmap->tlen) | |
| 353 replacement = current; | |
| 354 tree[current] = tree[cmap->tlen]; | |
| 355 parent = tree[current].parent; | |
| 356 if (parent == EMPTY) | |
| 357 cmap->ttop = current; | |
| 358 else if (tree[parent].left == (unsigned int) cmap->tlen) | |
| 359 tree[parent].left = current; | |
| 360 else | |
| 361 { | |
| 362 assert(tree[parent].right == (unsigned int) cmap->tlen); | |
| 363 tree[parent].right = current; | |
| 364 } | |
| 365 if (tree[current].left != EMPTY) | |
| 366 { | |
| 367 assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen); | |
| 368 tree[tree[current].left].parent = current; | |
| 369 } | |
| 370 if (tree[current].right != EMPTY) | |
| 371 { | |
| 372 assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen); | |
| 373 tree[tree[current].right].parent = current; | |
| 374 } | |
| 375 } | |
| 376 | |
| 377 /* Return the node that we should continue searching from */ | |
| 378 return replacement; | |
| 379 } | |
| 380 | |
| 381 #ifdef DUMP_SPLAY | |
| 382 static void | |
| 383 dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre) | |
| 384 { | |
| 385 int i; | |
| 386 | |
| 387 if (tree == NULL || node == EMPTY) | |
| 388 return; | |
| 389 | |
| 390 for (i = 0; i < depth; i++) | |
| 391 fprintf(stderr, " "); | |
| 392 fprintf(stderr, "%s%d:", pre, node); | |
| 393 if (tree[node].parent == EMPTY) | |
| 394 fprintf(stderr, "^EMPTY"); | |
| 395 else | |
| 396 fprintf(stderr, "^%d", tree[node].parent); | |
| 397 if (tree[node].left == EMPTY) | |
| 398 fprintf(stderr, "<EMPTY"); | |
| 399 else | |
| 400 fprintf(stderr, "<%d", tree[node].left); | |
| 401 if (tree[node].right == EMPTY) | |
| 402 fprintf(stderr, ">EMPTY"); | |
| 403 else | |
| 404 fprintf(stderr, ">%d", tree[node].right); | |
| 405 fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many); | |
| 406 assert(tree[node].parent == EMPTY || depth); | |
| 407 assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node); | |
| 408 assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node); | |
| 409 dump_splay(tree, tree[node].left, depth+1, "L"); | |
| 410 dump_splay(tree, tree[node].right, depth+1, "R"); | |
| 411 } | |
| 412 #endif | |
| 413 | |
| 414 enum | |
| 415 { | |
| 416 TOP = 0, | |
| 417 LEFT = 1, | |
| 418 RIGHT = 2 | |
| 419 }; | |
| 420 | |
| 421 static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg) | |
| 422 { | |
| 423 int from = TOP; | |
| 424 | |
| 425 while (node != EMPTY) | |
| 426 { | |
| 427 switch (from) | |
| 428 { | |
| 429 case TOP: | |
| 430 if (tree[node].left != EMPTY) | |
| 431 { | |
| 432 node = tree[node].left; | |
| 433 from = TOP; | |
| 434 break; | |
| 435 } | |
| 436 /* fallthrough */ | |
| 437 case LEFT: | |
| 438 fn(&tree[node], arg); | |
| 439 if (tree[node].right != EMPTY) | |
| 440 { | |
| 441 node = tree[node].right; | |
| 442 from = TOP; | |
| 443 break; | |
| 444 } | |
| 445 /* fallthrough */ | |
| 446 case RIGHT: | |
| 447 { | |
| 448 unsigned int parent = tree[node].parent; | |
| 449 if (parent == EMPTY) | |
| 450 return; | |
| 451 if (tree[parent].left == node) | |
| 452 from = LEFT; | |
| 453 else | |
| 454 { | |
| 455 assert(tree[parent].right == node); | |
| 456 from = RIGHT; | |
| 457 } | |
| 458 node = parent; | |
| 459 } | |
| 460 } | |
| 461 } | |
| 462 } | |
| 463 | |
| 464 #ifdef CHECK_SPLAY | |
| 465 | |
| 466 static int | |
| 467 tree_has_overlap(cmap_splay *tree, int node, int low, int high) | |
| 468 { | |
| 469 if (tree[node].left != EMPTY) | |
| 470 if (tree_has_overlap(tree, tree[node].left, low, high)) | |
| 471 return 1; | |
| 472 if (tree[node].right != EMPTY) | |
| 473 if (tree_has_overlap(tree, tree[node].right, low, high)) | |
| 474 return 1; | |
| 475 return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high); | |
| 476 } | |
| 477 | |
| 478 static void | |
| 479 do_check(cmap_splay *node, void *arg) | |
| 480 { | |
| 481 cmap_splay *tree = arg; | |
| 482 unsigned int num = node - tree; | |
| 483 assert(!node->many || node->low == node->high); | |
| 484 assert(node->low <= node->high); | |
| 485 assert((node->left == EMPTY) || (tree[node->left].parent == num && | |
| 486 tree[node->left].high < node->low)); | |
| 487 assert(node->right == EMPTY || (tree[node->right].parent == num && | |
| 488 node->high < tree[node->right].low)); | |
| 489 assert(!tree_has_overlap(tree, num, node->low, node->high)); | |
| 490 } | |
| 491 | |
| 492 static void | |
| 493 check_splay(cmap_splay *tree, unsigned int node, int depth) | |
| 494 { | |
| 495 if (node == EMPTY) | |
| 496 return; | |
| 497 assert(tree[node].parent == EMPTY); | |
| 498 walk_splay(tree, node, do_check, tree); | |
| 499 } | |
| 500 #endif | |
| 501 | |
| 502 /* | |
| 503 * Add a range. | |
| 504 */ | |
| 505 static void | |
| 506 add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many) | |
| 507 { | |
| 508 int current; | |
| 509 cmap_splay *tree; | |
| 510 | |
| 511 if (low > high) | |
| 512 { | |
| 513 fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name); | |
| 514 return; | |
| 515 } | |
| 516 | |
| 517 if (cmap->codespace_len == 0) | |
| 518 { | |
| 519 fz_warn(ctx, "CMap is missing codespace range"); | |
| 520 pdf_add_codespace(ctx, cmap, 0, 65535, 2); | |
| 521 } | |
| 522 | |
| 523 tree = cmap->tree; | |
| 524 | |
| 525 if (cmap->tlen) | |
| 526 { | |
| 527 unsigned int move = cmap->ttop; | |
| 528 unsigned int gt = EMPTY; | |
| 529 unsigned int lt = EMPTY; | |
| 530 if (check_for_overlap) | |
| 531 { | |
| 532 /* Check for collision with the current node */ | |
| 533 do | |
| 534 { | |
| 535 current = move; | |
| 536 /* Cases we might meet: | |
| 537 * tree[i]: <-----> | |
| 538 * case 0: <-> | |
| 539 * case 1: <-------> | |
| 540 * case 2: <-------------> | |
| 541 * case 3: <-> | |
| 542 * case 4: <-------> | |
| 543 * case 5: <-> | |
| 544 */ | |
| 545 if (low <= tree[current].low && tree[current].low <= high) | |
| 546 { | |
| 547 /* case 1, reduces to case 0 */ | |
| 548 /* or case 2, deleting the node */ | |
| 549 tree[current].out += high + 1 - tree[current].low; | |
| 550 tree[current].low = high + 1; | |
| 551 if (tree[current].low > tree[current].high) | |
| 552 { | |
| 553 /* update lt/gt references that will be moved/stale after deleting current */ | |
| 554 if (gt == (unsigned int) cmap->tlen - 1) | |
| 555 gt = current; | |
| 556 if (lt == (unsigned int) cmap->tlen - 1) | |
| 557 lt = current; | |
| 558 /* delete_node() moves the element at cmap->tlen-1 into current */ | |
| 559 move = delete_node(cmap, current); | |
| 560 current = EMPTY; | |
| 561 continue; | |
| 562 } | |
| 563 } | |
| 564 else if (low <= tree[current].high && tree[current].high <= high) | |
| 565 { | |
| 566 /* case 4, reduces to case 5 */ | |
| 567 tree[current].high = low - 1; | |
| 568 assert(tree[current].low <= tree[current].high); | |
| 569 } | |
| 570 else if (tree[current].low < low && high < tree[current].high) | |
| 571 { | |
| 572 /* case 3, reduces to case 5 */ | |
| 573 int new_high = tree[current].high; | |
| 574 tree[current].high = low-1; | |
| 575 add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, tree[current].many); | |
| 576 tree = cmap->tree; | |
| 577 } | |
| 578 /* Now look for where to move to next (left for case 0, right for case 5) */ | |
| 579 if (tree[current].low > high) { | |
| 580 move = tree[current].left; | |
| 581 gt = current; | |
| 582 } | |
| 583 else | |
| 584 { | |
| 585 move = tree[current].right; | |
| 586 lt = current; | |
| 587 } | |
| 588 } | |
| 589 while (move != EMPTY); | |
| 590 } | |
| 591 else | |
| 592 { | |
| 593 do | |
| 594 { | |
| 595 current = move; | |
| 596 if (tree[current].low > high) | |
| 597 { | |
| 598 move = tree[current].left; | |
| 599 gt = current; | |
| 600 } | |
| 601 else | |
| 602 { | |
| 603 move = tree[current].right; | |
| 604 lt = current; | |
| 605 } | |
| 606 } while (move != EMPTY); | |
| 607 } | |
| 608 /* current is now the node to which we would be adding the new node */ | |
| 609 /* lt is the last node we traversed which is lt the new node. */ | |
| 610 /* gt is the last node we traversed which is gt the new node. */ | |
| 611 | |
| 612 if (!many) | |
| 613 { | |
| 614 /* Check for the 'merge' cases. */ | |
| 615 if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low) | |
| 616 { | |
| 617 tree[lt].high = high; | |
| 618 if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) | |
| 619 { | |
| 620 tree[lt].high = tree[gt].high; | |
| 621 delete_node(cmap, gt); | |
| 622 } | |
| 623 goto exit; | |
| 624 } | |
| 625 if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low) | |
| 626 { | |
| 627 tree[gt].low = low; | |
| 628 tree[gt].out = out; | |
| 629 goto exit; | |
| 630 } | |
| 631 } | |
| 632 } | |
| 633 else | |
| 634 current = EMPTY; | |
| 635 | |
| 636 if (cmap->tlen == cmap->tcap) | |
| 637 { | |
| 638 int new_cap = cmap->tcap ? cmap->tcap * 2 : 256; | |
| 639 tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay); | |
| 640 cmap->tcap = new_cap; | |
| 641 } | |
| 642 tree[cmap->tlen].low = low; | |
| 643 tree[cmap->tlen].high = high; | |
| 644 tree[cmap->tlen].out = out; | |
| 645 tree[cmap->tlen].parent = current; | |
| 646 tree[cmap->tlen].left = EMPTY; | |
| 647 tree[cmap->tlen].right = EMPTY; | |
| 648 tree[cmap->tlen].many = many; | |
| 649 cmap->tlen++; | |
| 650 if (current == EMPTY) | |
| 651 cmap->ttop = 0; | |
| 652 else if (tree[current].low > high) | |
| 653 tree[current].left = cmap->tlen-1; | |
| 654 else | |
| 655 { | |
| 656 assert(tree[current].high < low); | |
| 657 tree[current].right = cmap->tlen-1; | |
| 658 } | |
| 659 move_to_root(tree, cmap->tlen-1); | |
| 660 cmap->ttop = cmap->tlen-1; | |
| 661 exit: | |
| 662 {} | |
| 663 #ifdef CHECK_SPLAY | |
| 664 check_splay(cmap->tree, cmap->ttop, 0); | |
| 665 #endif | |
| 666 #ifdef DUMP_SPLAY | |
| 667 dump_splay(cmap->tree, cmap->ttop, 0, ""); | |
| 668 #endif | |
| 669 } | |
| 670 | |
| 671 /* | |
| 672 * Add a one-to-many mapping. | |
| 673 */ | |
| 674 static void | |
| 675 add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len) | |
| 676 { | |
| 677 int out_pos; | |
| 678 | |
| 679 if (cmap->dlen + len + 1 > cmap->dcap) | |
| 680 { | |
| 681 int new_cap = cmap->dcap ? cmap->dcap * 2 : 256; | |
| 682 cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int); | |
| 683 cmap->dcap = new_cap; | |
| 684 } | |
| 685 out_pos = cmap->dlen; | |
| 686 cmap->dict[out_pos] = len; | |
| 687 memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len); | |
| 688 cmap->dlen += len + 1; | |
| 689 | |
| 690 add_range(ctx, cmap, low, low, out_pos, 1, 1); | |
| 691 } | |
| 692 | |
| 693 void | |
| 694 pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out) | |
| 695 { | |
| 696 add_range(ctx, cmap, low, high, out, 1, 0); | |
| 697 } | |
| 698 | |
| 699 void | |
| 700 pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len) | |
| 701 { | |
| 702 int *ovalues = values; | |
| 703 /* len is always restricted to <= 256 by the callers. */ | |
| 704 int local[256]; | |
| 705 | |
| 706 assert(len <= 256); | |
| 707 | |
| 708 /* Decode unicode surrogate pairs. */ | |
| 709 /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */ | |
| 710 if (len >= 2) | |
| 711 { | |
| 712 size_t i, j; | |
| 713 /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs | |
| 714 * with other chars. See bug 706131. */ | |
| 715 for (i = 0, j = 0; i < len; i++, j++) | |
| 716 { | |
| 717 int hi = ovalues[i]; | |
| 718 if (hi >= 0xd800 && hi < 0xdc00 && i < len-1) | |
| 719 { | |
| 720 int lo = ovalues[i+1]; | |
| 721 if (lo >= 0xdc00 && lo < 0xe000) | |
| 722 { | |
| 723 hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000; | |
| 724 i++; | |
| 725 } | |
| 726 } | |
| 727 if (values != local) | |
| 728 { | |
| 729 /* We can't change the callers data, so copy stuff in. */ | |
| 730 if (j) | |
| 731 memcpy(local, values, sizeof(local[0]) * (j-1)); | |
| 732 values = local; | |
| 733 } | |
| 734 values[j] = hi; | |
| 735 } | |
| 736 len = j; | |
| 737 } | |
| 738 | |
| 739 if (len == 1) | |
| 740 { | |
| 741 add_range(ctx, cmap, low, low, values[0], 1, 0); | |
| 742 return; | |
| 743 } | |
| 744 | |
| 745 if (len > PDF_MRANGE_CAP) | |
| 746 { | |
| 747 fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name); | |
| 748 return; | |
| 749 } | |
| 750 | |
| 751 add_mrange(ctx, cmap, low, values, (int)len); | |
| 752 } | |
| 753 | |
| 754 static void | |
| 755 count_node_types(cmap_splay *node, void *arg) | |
| 756 { | |
| 757 int *counts = (int *)arg; | |
| 758 | |
| 759 if (node->many) | |
| 760 counts[2]++; | |
| 761 else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) | |
| 762 counts[0]++; | |
| 763 else | |
| 764 counts[1]++; | |
| 765 } | |
| 766 | |
| 767 static void | |
| 768 copy_node_types(cmap_splay *node, void *arg) | |
| 769 { | |
| 770 pdf_cmap *cmap = (pdf_cmap *)arg; | |
| 771 | |
| 772 if (node->many) | |
| 773 { | |
| 774 assert(node->low == node->high); | |
| 775 cmap->mranges[cmap->mlen].low = node->low; | |
| 776 cmap->mranges[cmap->mlen].out = node->out; | |
| 777 cmap->mlen++; | |
| 778 } | |
| 779 else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF) | |
| 780 { | |
| 781 cmap->ranges[cmap->rlen].low = node->low; | |
| 782 cmap->ranges[cmap->rlen].high = node->high; | |
| 783 cmap->ranges[cmap->rlen].out = node->out; | |
| 784 cmap->rlen++; | |
| 785 } | |
| 786 else | |
| 787 { | |
| 788 cmap->xranges[cmap->xlen].low = node->low; | |
| 789 cmap->xranges[cmap->xlen].high = node->high; | |
| 790 cmap->xranges[cmap->xlen].out = node->out; | |
| 791 cmap->xlen++; | |
| 792 } | |
| 793 } | |
| 794 | |
| 795 void | |
| 796 pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap) | |
| 797 { | |
| 798 int counts[3]; | |
| 799 | |
| 800 if (cmap->tree == NULL) | |
| 801 return; | |
| 802 | |
| 803 counts[0] = 0; | |
| 804 counts[1] = 0; | |
| 805 counts[2] = 0; | |
| 806 walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts); | |
| 807 | |
| 808 cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range"); | |
| 809 cmap->rcap = counts[0]; | |
| 810 cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange"); | |
| 811 cmap->xcap = counts[1]; | |
| 812 cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange"); | |
| 813 cmap->mcap = counts[2]; | |
| 814 | |
| 815 walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap); | |
| 816 | |
| 817 fz_free(ctx, cmap->tree); | |
| 818 cmap->tree = NULL; | |
| 819 } | |
| 820 | |
| 821 int | |
| 822 pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt) | |
| 823 { | |
| 824 pdf_range *ranges = cmap->ranges; | |
| 825 pdf_xrange *xranges = cmap->xranges; | |
| 826 int l, r, m; | |
| 827 | |
| 828 l = 0; | |
| 829 r = cmap->rlen - 1; | |
| 830 while (l <= r) | |
| 831 { | |
| 832 m = (l + r) >> 1; | |
| 833 if (cpt < ranges[m].low) | |
| 834 r = m - 1; | |
| 835 else if (cpt > ranges[m].high) | |
| 836 l = m + 1; | |
| 837 else | |
| 838 return cpt - ranges[m].low + ranges[m].out; | |
| 839 } | |
| 840 | |
| 841 l = 0; | |
| 842 r = cmap->xlen - 1; | |
| 843 while (l <= r) | |
| 844 { | |
| 845 m = (l + r) >> 1; | |
| 846 if (cpt < xranges[m].low) | |
| 847 r = m - 1; | |
| 848 else if (cpt > xranges[m].high) | |
| 849 l = m + 1; | |
| 850 else | |
| 851 return cpt - xranges[m].low + xranges[m].out; | |
| 852 } | |
| 853 | |
| 854 if (cmap->usecmap) | |
| 855 return pdf_lookup_cmap(cmap->usecmap, cpt); | |
| 856 | |
| 857 return -1; | |
| 858 } | |
| 859 | |
| 860 int | |
| 861 pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out) | |
| 862 { | |
| 863 pdf_range *ranges = cmap->ranges; | |
| 864 pdf_xrange *xranges = cmap->xranges; | |
| 865 pdf_mrange *mranges = cmap->mranges; | |
| 866 unsigned int i; | |
| 867 int l, r, m; | |
| 868 | |
| 869 l = 0; | |
| 870 r = cmap->rlen - 1; | |
| 871 while (l <= r) | |
| 872 { | |
| 873 m = (l + r) >> 1; | |
| 874 if (cpt < ranges[m].low) | |
| 875 r = m - 1; | |
| 876 else if (cpt > ranges[m].high) | |
| 877 l = m + 1; | |
| 878 else | |
| 879 { | |
| 880 out[0] = cpt - ranges[m].low + ranges[m].out; | |
| 881 return 1; | |
| 882 } | |
| 883 } | |
| 884 | |
| 885 l = 0; | |
| 886 r = cmap->xlen - 1; | |
| 887 while (l <= r) | |
| 888 { | |
| 889 m = (l + r) >> 1; | |
| 890 if (cpt < xranges[m].low) | |
| 891 r = m - 1; | |
| 892 else if (cpt > xranges[m].high) | |
| 893 l = m + 1; | |
| 894 else | |
| 895 { | |
| 896 out[0] = cpt - xranges[m].low + xranges[m].out; | |
| 897 return 1; | |
| 898 } | |
| 899 } | |
| 900 | |
| 901 l = 0; | |
| 902 r = cmap->mlen - 1; | |
| 903 while (l <= r) | |
| 904 { | |
| 905 m = (l + r) >> 1; | |
| 906 if (cpt < mranges[m].low) | |
| 907 r = m - 1; | |
| 908 else if (cpt > mranges[m].low) | |
| 909 l = m + 1; | |
| 910 else | |
| 911 { | |
| 912 int *ptr = &cmap->dict[cmap->mranges[m].out]; | |
| 913 unsigned int len = (unsigned int)*ptr++; | |
| 914 for (i = 0; i < len; ++i) | |
| 915 out[i] = *ptr++; | |
| 916 return len; | |
| 917 } | |
| 918 } | |
| 919 | |
| 920 if (cmap->usecmap) | |
| 921 return pdf_lookup_cmap_full(cmap->usecmap, cpt, out); | |
| 922 | |
| 923 return 0; | |
| 924 } | |
| 925 | |
| 926 int | |
| 927 pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt) | |
| 928 { | |
| 929 unsigned int c; | |
| 930 int k, n; | |
| 931 int len = end - buf; | |
| 932 | |
| 933 if (len > 4) | |
| 934 len = 4; | |
| 935 | |
| 936 c = 0; | |
| 937 for (n = 0; n < len; n++) | |
| 938 { | |
| 939 c = (c << 8) | buf[n]; | |
| 940 for (k = 0; k < cmap->codespace_len; k++) | |
| 941 { | |
| 942 if (cmap->codespace[k].n == n + 1) | |
| 943 { | |
| 944 if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) | |
| 945 { | |
| 946 *cpt = c; | |
| 947 return n + 1; | |
| 948 } | |
| 949 } | |
| 950 } | |
| 951 } | |
| 952 | |
| 953 *cpt = 0; | |
| 954 return 1; | |
| 955 } | |
| 956 | |
| 957 size_t | |
| 958 pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap) | |
| 959 { | |
| 960 if (cmap == NULL) | |
| 961 return 0; | |
| 962 if (cmap->storable.refs < 0) | |
| 963 return 0; | |
| 964 | |
| 965 return pdf_cmap_size(ctx, cmap->usecmap) + | |
| 966 cmap->rcap * sizeof *cmap->ranges + | |
| 967 cmap->xcap * sizeof *cmap->xranges + | |
| 968 cmap->mcap * sizeof *cmap->mranges + | |
| 969 cmap->tcap * sizeof *cmap->tree + | |
| 970 sizeof(*cmap); | |
| 971 } |
