Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/bidi.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Bidirectional text processing. | |
| 3 * | |
| 4 * Processes unicode text by arranging the characters into an order suitable | |
| 5 * for display. E.g. Hebrew text will be arranged from right-to-left and | |
| 6 * any English within the text will remain in the left-to-right order. | |
| 7 * Characters such as parenthesis will be substituted for their mirrored | |
| 8 * equivalents if they are part of text which must be reversed. | |
| 9 * | |
| 10 * This is an implementation of the unicode Bidirectional Algorithm which | |
| 11 * can be found here: http://www.unicode.org/reports/tr9/ and is based | |
| 12 * on the reference implementation of the algorithm found on that page. | |
| 13 * | |
| 14 * For a nice overview of how it works, read this... | |
| 15 * http://www.w3.org/TR/REC-html40/struct/dirlang.html | |
| 16 * | |
| 17 * Extracted from the SmartOffice code, where it was modified by Ian | |
| 18 * Beveridge. | |
| 19 * | |
| 20 * Copyright (C) Picsel, 2004. All Rights Reserved. | |
| 21 */ | |
| 22 | |
| 23 /* | |
| 24 * Original copyright notice from unicode reference implementation. | |
| 25 * ---------------------------------------------------------------- | |
| 26 * Written by: Asmus Freytag | |
| 27 * C++ and Windows dependencies removed, and | |
| 28 * command line interface added by: Rick McGowan | |
| 29 * | |
| 30 * Copyright (C) 1999, ASMUS, Inc. All Rights Reserved | |
| 31 */ | |
| 32 | |
| 33 /* | |
| 34 * Includes... | |
| 35 */ | |
| 36 | |
| 37 #include "mupdf/fitz.h" | |
| 38 #include "mupdf/ucdn.h" | |
| 39 #include "bidi-imp.h" /* standard bidi code interface */ | |
| 40 #include <assert.h> | |
| 41 | |
| 42 /* | |
| 43 * Macros... | |
| 44 */ | |
| 45 | |
| 46 #define ODD(x) ((x) & 1) | |
| 47 | |
| 48 #define REPLACEABLE_TYPE(t) ( \ | |
| 49 ((t)==BDI_ES) || ((t)==BDI_ET) || ((t)==BDI_CS) || \ | |
| 50 ((t)==BDI_NSM) || ((t)==BDI_PDF) || ((t)==BDI_BN) || \ | |
| 51 ((t)==BDI_S) || ((t)==BDI_WS) || ((t)==BDI_N) ) | |
| 52 | |
| 53 #ifdef DEBUG_BIDI_VERBOSE | |
| 54 #define DBUGVF(params) do { fz_warn params; } while (0) | |
| 55 #else | |
| 56 #define DBUGVF(params) do {} while (0) | |
| 57 #endif | |
| 58 | |
| 59 #ifdef DEBUG_BIDI_OUTLINE | |
| 60 #define DBUGH(params) do { fz_warn params; } while (0) | |
| 61 #else | |
| 62 #define DBUGH(params) do {} while (0) | |
| 63 #endif | |
| 64 | |
| 65 #define UNICODE_EOS 0 | |
| 66 #define UNICODE_DIGIT_ZERO 0x0030 | |
| 67 #define UNICODE_DIGIT_NINE 0x0039 | |
| 68 #define UNICODE_SUPERSCRIPT_TWO 0x00B2 | |
| 69 #define UNICODE_SUPERSCRIPT_THREE 0x00B3 | |
| 70 #define UNICODE_SUPERSCRIPT_ONE 0x00B9 | |
| 71 #define UNICODE_RTL_START 0x0590 | |
| 72 #define UNICODE_RTL_END 0x07BF | |
| 73 #define UNICODE_ARABIC_INDIC_DIGIT_ZERO 0x0660 | |
| 74 #define UNICODE_ARABIC_INDIC_DIGIT_NINE 0x0669 | |
| 75 #define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO 0x06F0 | |
| 76 #define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE 0x06F9 | |
| 77 #define UNICODE_ZERO_WIDTH_NON_JOINER 0x200C | |
| 78 #define UNICODE_SUPERSCRIPT_ZERO 0x2070 | |
| 79 #define UNICODE_SUPERSCRIPT_FOUR 0x2074 | |
| 80 #define UNICODE_SUPERSCRIPT_NINE 0x2079 | |
| 81 #define UNICODE_SUBSCRIPT_ZERO 0x2080 | |
| 82 #define UNICODE_SUBSCRIPT_NINE 0x2089 | |
| 83 #define UNICODE_CIRCLED_DIGIT_ONE 0x2460 | |
| 84 #define UNICODE_NUMBER_TWENTY_FULL_STOP 0x249B | |
| 85 #define UNICODE_CIRCLED_DIGIT_ZERO 0x24EA | |
| 86 #define UNICODE_FULLWIDTH_DIGIT_ZERO 0xFF10 | |
| 87 #define UNICODE_FULLWIDTH_DIGIT_NINE 0xFF19 | |
| 88 | |
| 89 #ifndef TRUE | |
| 90 #define TRUE (1) | |
| 91 #endif | |
| 92 #ifndef FALSE | |
| 93 #define FALSE (0) | |
| 94 #endif | |
| 95 | |
| 96 /* | |
| 97 * Enumerations... | |
| 98 */ | |
| 99 | |
| 100 #ifdef DEBUG_BIDI_VERBOSE | |
| 101 /* display support: */ | |
| 102 static const char char_from_types[] = | |
| 103 { | |
| 104 ' ', /* ON */ | |
| 105 '>', /* L */ | |
| 106 '<', /* R */ | |
| 107 '9', /* AN */ | |
| 108 '1', /* EN */ | |
| 109 'a', /* AL */ | |
| 110 '@', /* NSM */ | |
| 111 '.', /* CS */ | |
| 112 ',', /* ES */ | |
| 113 '$', /* ET */ | |
| 114 ':', /* BN */ | |
| 115 'X', /* S */ | |
| 116 '_', /* WS */ | |
| 117 'B', /* B */ | |
| 118 '+', /* RLO */ | |
| 119 '+', /* RLE */ | |
| 120 '+', /* LRO */ | |
| 121 '+', /* LRE */ | |
| 122 '-', /* PDF */ | |
| 123 '=' /* LS */ | |
| 124 }; | |
| 125 #endif | |
| 126 | |
| 127 /* | |
| 128 * Functions and static functions... | |
| 129 */ | |
| 130 | |
| 131 /* UCDN uses a different ordering than Bidi does. We cannot | |
| 132 * change to the UCDN ordering, as the bidi-std.c code relies | |
| 133 * on the exact ordering (at least that N = ON = 0). We | |
| 134 * therefore map between the two using this small table. It | |
| 135 * also takes care of fudging LRI, RLI, FSI and PDI, that this | |
| 136 * code does not currently support. */ | |
| 137 static const uint8_t ucdn_to_bidi[] = | |
| 138 { | |
| 139 BDI_L, /* UCDN_BIDI_CLASS_L = 0 */ | |
| 140 BDI_LRE, /* UCDN_BIDI_CLASS_LRE = 1 */ | |
| 141 BDI_LRO, /* UCDN_BIDI_CLASS_LRO = 2 */ | |
| 142 BDI_R, /* UCDN_BIDI_CLASS_R = 3 */ | |
| 143 BDI_AL, /* UCDN_BIDI_CLASS_AL = 4 */ | |
| 144 BDI_RLE, /* UCDN_BIDI_CLASS_RLE = 5 */ | |
| 145 BDI_RLO, /* UCDN_BIDI_CLASS_RLO = 6 */ | |
| 146 BDI_PDF, /* UCDN_BIDI_CLASS_PDF = 7 */ | |
| 147 BDI_EN, /* UCDN_BIDI_CLASS_EN = 8 */ | |
| 148 BDI_ES, /* UCDN_BIDI_CLASS_ES = 9 */ | |
| 149 BDI_ET, /* UCDN_BIDI_CLASS_ET = 10 */ | |
| 150 BDI_AN, /* UCDN_BIDI_CLASS_AN = 11 */ | |
| 151 BDI_CS, /* UCDN_BIDI_CLASS_CS = 12 */ | |
| 152 BDI_NSM, /* UCDN_BIDI_CLASS_NSM = 13 */ | |
| 153 BDI_BN, /* UCDN_BIDI_CLASS_BN = 14 */ | |
| 154 BDI_B, /* UCDN_BIDI_CLASS_B = 15 */ | |
| 155 BDI_S, /* UCDN_BIDI_CLASS_S = 16 */ | |
| 156 BDI_WS, /* UCDN_BIDI_CLASS_WS = 17 */ | |
| 157 BDI_ON, /* UCDN_BIDI_CLASS_ON = 18 */ | |
| 158 BDI_LRE, /* UCDN_BIDI_CLASS_LRI = 19 */ | |
| 159 BDI_RLE, /* UCDN_BIDI_CLASS_RLI = 20 */ | |
| 160 BDI_N, /* UCDN_BIDI_CLASS_FSI = 21 */ | |
| 161 BDI_N, /* UCDN_BIDI_CLASS_PDI = 22 */ | |
| 162 }; | |
| 163 | |
| 164 #define class_from_ch_ws(ch) (ucdn_to_bidi[ucdn_get_bidi_class(ch)]) | |
| 165 | |
| 166 /* Return a direction for white-space on the second pass of the algorithm. */ | |
| 167 static fz_bidi_chartype class_from_ch_n(uint32_t ch) | |
| 168 { | |
| 169 fz_bidi_chartype from_ch_ws = class_from_ch_ws(ch); | |
| 170 if (from_ch_ws == BDI_S || from_ch_ws == BDI_WS) | |
| 171 return BDI_N; | |
| 172 return from_ch_ws; | |
| 173 } | |
| 174 | |
| 175 /* Split fragments into single scripts (or punctuation + single script) */ | |
| 176 static void | |
| 177 split_at_script(const uint32_t *fragment, | |
| 178 size_t fragment_len, | |
| 179 int level, | |
| 180 void *arg, | |
| 181 fz_bidi_fragment_fn *callback) | |
| 182 { | |
| 183 int script = UCDN_SCRIPT_COMMON; | |
| 184 size_t script_start, i; | |
| 185 | |
| 186 script_start = 0; | |
| 187 for (i = 0; i < fragment_len; i++) | |
| 188 { | |
| 189 int s = ucdn_get_script(fragment[i]); | |
| 190 if (s == UCDN_SCRIPT_COMMON || s == UCDN_SCRIPT_INHERITED) | |
| 191 { | |
| 192 /* Punctuation etc. This is fine. */ | |
| 193 } | |
| 194 else if (s == script) | |
| 195 { | |
| 196 /* Same script. Still fine. */ | |
| 197 } | |
| 198 else if (script == UCDN_SCRIPT_COMMON || script == UCDN_SCRIPT_INHERITED) | |
| 199 { | |
| 200 /* First non punctuation thing. Set the script. */ | |
| 201 script = s; | |
| 202 } | |
| 203 else | |
| 204 { | |
| 205 /* Change of script. Break the fragment. */ | |
| 206 (*callback)(&fragment[script_start], i - script_start, level, script, arg); | |
| 207 script_start = i; | |
| 208 script = s; | |
| 209 } | |
| 210 } | |
| 211 if (script_start != fragment_len) | |
| 212 { | |
| 213 (*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg); | |
| 214 } | |
| 215 } | |
| 216 | |
| 217 /* Determines the character classes for all following | |
| 218 * passes of the algorithm. A character class is basically the type of Bidi | |
| 219 * behaviour that the character exhibits. | |
| 220 */ | |
| 221 static void | |
| 222 classify_characters(const uint32_t *text, | |
| 223 fz_bidi_chartype *types, | |
| 224 size_t len, | |
| 225 fz_bidi_flags flags) | |
| 226 { | |
| 227 size_t i; | |
| 228 | |
| 229 if ((flags & FZ_BIDI_CLASSIFY_WHITE_SPACE)!=0) | |
| 230 { | |
| 231 for (i = 0; i < len; i++) | |
| 232 { | |
| 233 types[i] = class_from_ch_ws(text[i]); | |
| 234 } | |
| 235 } | |
| 236 else | |
| 237 { | |
| 238 #ifdef DEBUG_BIDI_VERBOSE | |
| 239 fprintf(stderr, "Text: "); | |
| 240 for (i = 0; i < len; i++) | |
| 241 { | |
| 242 /* So that we can actually sort of read the debug string, any | |
| 243 * non-ascii characters are replaced with a 1-digit hash | |
| 244 * value from 0-9, making non-english characters appear | |
| 245 * as numbers | |
| 246 */ | |
| 247 fprintf(stderr, "%c", (text[i] <= 127 && text[i] >= 32) ? | |
| 248 text[i] : text[i] % 9 + '0'); | |
| 249 } | |
| 250 fprintf(stderr, "\nTypes: "); | |
| 251 #endif | |
| 252 for (i = 0; i < len; i++) | |
| 253 { | |
| 254 types[i] = class_from_ch_n(text[i]); | |
| 255 #ifdef DEBUG_BIDI_VERBOSE | |
| 256 fprintf(stderr, "%c", char_from_types[(int)types[i]]); | |
| 257 #endif | |
| 258 } | |
| 259 #ifdef DEBUG_BIDI_VERBOSE | |
| 260 fprintf(stderr, "\n"); | |
| 261 #endif | |
| 262 } | |
| 263 } | |
| 264 | |
| 265 /* Determines the base level of the text. | |
| 266 * Implements rule P2 of the Unicode Bidi Algorithm. | |
| 267 * Note: Ignores explicit embeddings | |
| 268 */ | |
| 269 static fz_bidi_level base_level_from_text(fz_bidi_chartype *types, size_t len) | |
| 270 { | |
| 271 size_t i; | |
| 272 | |
| 273 for (i = 0; i < len; i++) | |
| 274 { | |
| 275 switch (types[i]) | |
| 276 { | |
| 277 /* strong left */ | |
| 278 case BDI_L: | |
| 279 return FZ_BIDI_LTR; | |
| 280 | |
| 281 /* strong right */ | |
| 282 case BDI_R: | |
| 283 case BDI_AL: | |
| 284 return FZ_BIDI_RTL; | |
| 285 } | |
| 286 } | |
| 287 return FZ_BIDI_LTR; | |
| 288 } | |
| 289 | |
| 290 static fz_bidi_direction direction_from_type(fz_bidi_chartype type) | |
| 291 { | |
| 292 switch (type) | |
| 293 { | |
| 294 case BDI_L: | |
| 295 case BDI_EN: | |
| 296 return FZ_BIDI_LTR; | |
| 297 | |
| 298 case BDI_R: | |
| 299 case BDI_AL: | |
| 300 return FZ_BIDI_RTL; | |
| 301 | |
| 302 default: | |
| 303 return FZ_BIDI_NEUTRAL; | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 static void | |
| 308 classify_quoted_blocks(const uint32_t *text, | |
| 309 fz_bidi_chartype *types, | |
| 310 size_t len) | |
| 311 { | |
| 312 size_t i; | |
| 313 int inQuote = FALSE; | |
| 314 int pdfNeeded = FALSE; | |
| 315 int ltrFound = FALSE; | |
| 316 int rtlFound = FALSE; | |
| 317 | |
| 318 /* Only do anything special here if there is mixed content | |
| 319 * (LTR *and* RTL) in the text. | |
| 320 */ | |
| 321 for (i = 0; i < len; i++) | |
| 322 { | |
| 323 switch (direction_from_type(types[i])) | |
| 324 { | |
| 325 case FZ_BIDI_LTR: | |
| 326 ltrFound = TRUE; | |
| 327 break; | |
| 328 | |
| 329 case FZ_BIDI_RTL: | |
| 330 rtlFound = TRUE; | |
| 331 break; | |
| 332 | |
| 333 default: | |
| 334 break; | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 /* Only make any changes if *both* LTR and RTL characters exist | |
| 339 * in this text. | |
| 340 */ | |
| 341 if (!ltrFound || !rtlFound) | |
| 342 { | |
| 343 return; | |
| 344 } | |
| 345 | |
| 346 for (i = 0; i < len; i++) | |
| 347 { | |
| 348 if (text[i]=='"') | |
| 349 { | |
| 350 /* If we're already in a quote then terminate it, | |
| 351 * else start a new block. | |
| 352 */ | |
| 353 if (inQuote) | |
| 354 { | |
| 355 inQuote = FALSE; | |
| 356 if (pdfNeeded) | |
| 357 { | |
| 358 pdfNeeded = FALSE; | |
| 359 types[i] = BDI_PDF; | |
| 360 } | |
| 361 } | |
| 362 else | |
| 363 { | |
| 364 size_t j; | |
| 365 int done = FALSE; | |
| 366 | |
| 367 inQuote = TRUE; | |
| 368 | |
| 369 /* Find the first strong right or left type and | |
| 370 * use that to determine whether we should classify | |
| 371 * the quote as LRE or RLE. Or neither, if we | |
| 372 * hit another quote before any strongly-directional | |
| 373 * character. | |
| 374 */ | |
| 375 for (j = i + 1; !done && (j < len) && text[j] != '"'; ++j) | |
| 376 { | |
| 377 switch(types[j]) | |
| 378 { | |
| 379 case BDI_RLE: | |
| 380 case BDI_LRE: | |
| 381 done = TRUE; | |
| 382 break; | |
| 383 | |
| 384 case BDI_L: | |
| 385 case BDI_EN: | |
| 386 types[i] = BDI_LRE; | |
| 387 pdfNeeded = TRUE; | |
| 388 done = TRUE; | |
| 389 break; | |
| 390 | |
| 391 case BDI_R: | |
| 392 case BDI_AL: | |
| 393 types[i] = BDI_RLE; | |
| 394 pdfNeeded = TRUE; | |
| 395 done = TRUE; | |
| 396 break; | |
| 397 | |
| 398 default: | |
| 399 break; | |
| 400 } | |
| 401 } | |
| 402 } | |
| 403 } | |
| 404 } | |
| 405 } | |
| 406 | |
| 407 /* Creates a buffer with an embedding level for every character in the | |
| 408 * given text. Also determines the base level and returns it in | |
| 409 * *baseDir if *baseDir does not initially contain a valid direction. | |
| 410 */ | |
| 411 static fz_bidi_level * | |
| 412 create_levels(fz_context *ctx, | |
| 413 const uint32_t *text, | |
| 414 size_t len, | |
| 415 fz_bidi_direction *baseDir, | |
| 416 int resolveWhiteSpace, | |
| 417 int flags) | |
| 418 { | |
| 419 fz_bidi_level *levels, *plevels; | |
| 420 fz_bidi_chartype *types = NULL; | |
| 421 fz_bidi_chartype *ptypes; | |
| 422 fz_bidi_level baseLevel; | |
| 423 const uint32_t *ptext; | |
| 424 size_t plen, remaining; | |
| 425 | |
| 426 levels = Memento_label(fz_malloc(ctx, len * sizeof(*levels)), "bidi_levels"); | |
| 427 | |
| 428 fz_var(types); | |
| 429 | |
| 430 fz_try(ctx) | |
| 431 { | |
| 432 types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype)); | |
| 433 | |
| 434 classify_characters(text, types, len, flags); | |
| 435 | |
| 436 if (*baseDir != FZ_BIDI_LTR && *baseDir != FZ_BIDI_RTL) | |
| 437 { | |
| 438 /* Derive the base level from the text and | |
| 439 * update *baseDir in case the caller wants to know. | |
| 440 */ | |
| 441 baseLevel = base_level_from_text(types, len); | |
| 442 *baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR; | |
| 443 } | |
| 444 else | |
| 445 { | |
| 446 baseLevel = (fz_bidi_level)*baseDir; | |
| 447 } | |
| 448 | |
| 449 { | |
| 450 /* Replace tab with base direction, i.e. make tab appear as | |
| 451 * 'strong left' if the base direction is left-to-right and | |
| 452 * 'strong right' if base direction is right-to-left. This | |
| 453 * allows Layout to implicitly treat tabs as 'segment separators'. | |
| 454 */ | |
| 455 size_t i; | |
| 456 | |
| 457 for (i = 0u; i < len; i++) | |
| 458 { | |
| 459 if (text[i]=='\t') | |
| 460 { | |
| 461 types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L; | |
| 462 } | |
| 463 } | |
| 464 } | |
| 465 | |
| 466 /* Look for quotation marks. Classify them as RLE or LRE | |
| 467 * or leave them alone, depending on what follows them. | |
| 468 */ | |
| 469 classify_quoted_blocks(text, types, len); | |
| 470 | |
| 471 /* Work one paragraph at a time. */ | |
| 472 plevels = levels; | |
| 473 ptypes = types; | |
| 474 ptext = text; | |
| 475 remaining = len; | |
| 476 while (remaining) | |
| 477 { | |
| 478 plen = fz_bidi_resolve_paragraphs(ptypes, remaining); | |
| 479 | |
| 480 /* Work out the levels and character types... */ | |
| 481 (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, ptypes, plevels, plen, 0); | |
| 482 fz_bidi_resolve_weak(ctx, baseLevel, ptypes, plevels, plen); | |
| 483 fz_bidi_resolve_neutrals(baseLevel, ptypes, plevels, plen); | |
| 484 fz_bidi_resolve_implicit(ptypes, plevels, plen); | |
| 485 | |
| 486 classify_characters(ptext, ptypes, plen, FZ_BIDI_CLASSIFY_WHITE_SPACE); | |
| 487 | |
| 488 if (resolveWhiteSpace) | |
| 489 { | |
| 490 /* resolve whitespace */ | |
| 491 fz_bidi_resolve_whitespace(baseLevel, ptypes, plevels, plen); | |
| 492 } | |
| 493 | |
| 494 plevels += plen; | |
| 495 ptypes += plen; | |
| 496 ptext += plen; | |
| 497 remaining -= plen; | |
| 498 } | |
| 499 | |
| 500 /* The levels buffer now has odd and even numbers indicating | |
| 501 * rtl or ltr characters, respectively. | |
| 502 */ | |
| 503 #ifdef DEBUG_BIDI_VERBOSE | |
| 504 fprintf(stderr, "Levels: "); | |
| 505 { | |
| 506 size_t i; | |
| 507 for (i = 0; i < len; i++) | |
| 508 { | |
| 509 fprintf(stderr, "%d", levels[i]>9?0:levels[i]); | |
| 510 } | |
| 511 fprintf(stderr, "\n"); | |
| 512 } | |
| 513 #endif | |
| 514 } | |
| 515 fz_always(ctx) | |
| 516 { | |
| 517 fz_free(ctx, types); | |
| 518 } | |
| 519 fz_catch(ctx) | |
| 520 { | |
| 521 fz_free(ctx, levels); | |
| 522 fz_rethrow(ctx); | |
| 523 } | |
| 524 return levels; | |
| 525 } | |
| 526 | |
| 527 /* Partitions the given character sequence into one or more unidirectional | |
| 528 * fragments and invokes the given callback function for each fragment. | |
| 529 */ | |
| 530 void fz_bidi_fragment_text(fz_context *ctx, | |
| 531 const uint32_t *text, | |
| 532 size_t textlen, | |
| 533 fz_bidi_direction *baseDir, | |
| 534 fz_bidi_fragment_fn *callback, | |
| 535 void *arg, | |
| 536 int flags) | |
| 537 { | |
| 538 size_t startOfFragment; | |
| 539 size_t i; | |
| 540 fz_bidi_level *levels; | |
| 541 | |
| 542 if (text == NULL || callback == NULL || textlen == 0) | |
| 543 return; | |
| 544 | |
| 545 DBUGH((ctx, "fz_bidi_fragment_text('%S', len = %d)\n", text, textlen)); | |
| 546 | |
| 547 levels = create_levels(ctx, text, textlen, baseDir, FALSE, flags); | |
| 548 | |
| 549 /* We now have an array with an embedding level | |
| 550 * for each character in text. | |
| 551 */ | |
| 552 assert(levels != NULL); | |
| 553 | |
| 554 fz_try(ctx) | |
| 555 { | |
| 556 startOfFragment = 0; | |
| 557 for (i = 1; i < textlen; i++) | |
| 558 { | |
| 559 if (levels[i] != levels[i-1]) | |
| 560 { | |
| 561 /* We've gone past the end of the fragment. | |
| 562 * Create a text object for it, then start | |
| 563 * a new fragment. | |
| 564 */ | |
| 565 split_at_script(&text[startOfFragment], | |
| 566 i - startOfFragment, | |
| 567 levels[startOfFragment], | |
| 568 arg, | |
| 569 callback); | |
| 570 startOfFragment = i; | |
| 571 } | |
| 572 } | |
| 573 /* Now i == textlen. Deal with the final (or maybe only) fragment. */ | |
| 574 /* otherwise create 1 fragment */ | |
| 575 split_at_script(&text[startOfFragment], | |
| 576 i - startOfFragment, | |
| 577 levels[startOfFragment], | |
| 578 arg, | |
| 579 callback); | |
| 580 } | |
| 581 fz_always(ctx) | |
| 582 { | |
| 583 fz_free(ctx, levels); | |
| 584 } | |
| 585 fz_catch(ctx) | |
| 586 { | |
| 587 fz_rethrow(ctx); | |
| 588 } | |
| 589 } |
