Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/ucdn.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> | |
| 3 * | |
| 4 * Permission to use, copy, modify, and/or distribute this software for any | |
| 5 * purpose with or without fee is hereby granted, provided that the above | |
| 6 * copyright notice and this permission notice appear in all copies. | |
| 7 * | |
| 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| 15 */ | |
| 16 | |
| 17 #include "mupdf/fitz.h" | |
| 18 #include "mupdf/ucdn.h" | |
| 19 | |
| 20 #include <stdio.h> | |
| 21 #include <stdlib.h> | |
| 22 | |
| 23 typedef struct { | |
| 24 unsigned char category; | |
| 25 unsigned char combining; | |
| 26 unsigned char bidi_class; | |
| 27 unsigned char east_asian_width; | |
| 28 unsigned char script; | |
| 29 unsigned char linebreak_class; | |
| 30 } UCDRecord; | |
| 31 | |
| 32 typedef struct { | |
| 33 unsigned short from, to; | |
| 34 } MirrorPair; | |
| 35 | |
| 36 typedef struct { | |
| 37 unsigned short from, to; | |
| 38 unsigned char type; | |
| 39 } BracketPair; | |
| 40 | |
| 41 typedef struct { | |
| 42 unsigned int start; | |
| 43 short count, index; | |
| 44 } Reindex; | |
| 45 | |
| 46 #include "ucdn_db.h" | |
| 47 | |
| 48 /* constants required for Hangul (de)composition */ | |
| 49 #define SBASE 0xAC00 | |
| 50 #define LBASE 0x1100 | |
| 51 #define VBASE 0x1161 | |
| 52 #define TBASE 0x11A7 | |
| 53 #define SCOUNT 11172 | |
| 54 #define LCOUNT 19 | |
| 55 #define VCOUNT 21 | |
| 56 #define TCOUNT 28 | |
| 57 #define NCOUNT (VCOUNT * TCOUNT) | |
| 58 | |
| 59 static const UCDRecord *get_ucd_record(uint32_t code) | |
| 60 { | |
| 61 int index, offset; | |
| 62 | |
| 63 if (code >= 0x110000) | |
| 64 index = 0; | |
| 65 else { | |
| 66 index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1; | |
| 67 offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1); | |
| 68 index = index1[index + offset] << SHIFT2; | |
| 69 offset = code & ((1<<SHIFT2) - 1); | |
| 70 index = index2[index + offset]; | |
| 71 } | |
| 72 | |
| 73 return &ucd_records[index]; | |
| 74 } | |
| 75 | |
| 76 static const unsigned short *get_decomp_record(uint32_t code) | |
| 77 { | |
| 78 int index, offset; | |
| 79 | |
| 80 if (code >= 0x110000) | |
| 81 index = 0; | |
| 82 else { | |
| 83 index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)] | |
| 84 << DECOMP_SHIFT1; | |
| 85 offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1); | |
| 86 index = decomp_index1[index + offset] << DECOMP_SHIFT2; | |
| 87 offset = code & ((1<<DECOMP_SHIFT2) - 1); | |
| 88 index = decomp_index2[index + offset]; | |
| 89 } | |
| 90 | |
| 91 return &decomp_data[index]; | |
| 92 } | |
| 93 | |
| 94 static int compare_reindex(const void *a, const void *b) | |
| 95 { | |
| 96 Reindex *ra = (Reindex *)a; | |
| 97 Reindex *rb = (Reindex *)b; | |
| 98 | |
| 99 if (ra->start < rb->start) | |
| 100 return -1; | |
| 101 else if (ra->start > (rb->start + rb->count)) | |
| 102 return 1; | |
| 103 else | |
| 104 return 0; | |
| 105 } | |
| 106 | |
| 107 static int get_comp_index(uint32_t code, const Reindex *idx, size_t len) | |
| 108 { | |
| 109 Reindex *res; | |
| 110 Reindex r = {0, 0, 0}; | |
| 111 r.start = code; | |
| 112 res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex); | |
| 113 | |
| 114 if (res != NULL) | |
| 115 return res->index + (code - res->start); | |
| 116 else | |
| 117 return -1; | |
| 118 } | |
| 119 | |
| 120 static int compare_mp(const void *a, const void *b) | |
| 121 { | |
| 122 MirrorPair *mpa = (MirrorPair *)a; | |
| 123 MirrorPair *mpb = (MirrorPair *)b; | |
| 124 return mpa->from - mpb->from; | |
| 125 } | |
| 126 | |
| 127 static int compare_bp(const void *a, const void *b) | |
| 128 { | |
| 129 BracketPair *bpa = (BracketPair *)a; | |
| 130 BracketPair *bpb = (BracketPair *)b; | |
| 131 return bpa->from - bpb->from; | |
| 132 } | |
| 133 | |
| 134 static BracketPair *search_bp(uint32_t code) | |
| 135 { | |
| 136 BracketPair bp = {0,0,2}; | |
| 137 BracketPair *res; | |
| 138 | |
| 139 bp.from = code; | |
| 140 res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, | |
| 141 sizeof(BracketPair), compare_bp); | |
| 142 return res; | |
| 143 } | |
| 144 | |
| 145 static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) | |
| 146 { | |
| 147 int si = code - SBASE; | |
| 148 | |
| 149 if (si < 0 || si >= SCOUNT) | |
| 150 return 0; | |
| 151 | |
| 152 if (si % TCOUNT) { | |
| 153 /* LV,T */ | |
| 154 *a = SBASE + (si / TCOUNT) * TCOUNT; | |
| 155 *b = TBASE + (si % TCOUNT); | |
| 156 return 3; | |
| 157 } else { | |
| 158 /* L,V */ | |
| 159 *a = LBASE + (si / NCOUNT); | |
| 160 *b = VBASE + (si % NCOUNT) / TCOUNT; | |
| 161 return 2; | |
| 162 } | |
| 163 } | |
| 164 | |
| 165 static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b) | |
| 166 { | |
| 167 if (a >= SBASE && a < (SBASE + SCOUNT) && b >= TBASE && b < (TBASE + TCOUNT)) { | |
| 168 /* LV,T */ | |
| 169 *code = a + (b - TBASE); | |
| 170 return 3; | |
| 171 } else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) { | |
| 172 /* L,V */ | |
| 173 int li = a - LBASE; | |
| 174 int vi = b - VBASE; | |
| 175 *code = SBASE + li * NCOUNT + vi * TCOUNT; | |
| 176 return 2; | |
| 177 } else { | |
| 178 return 0; | |
| 179 } | |
| 180 } | |
| 181 | |
| 182 static uint32_t decode_utf16(const unsigned short **code_ptr) | |
| 183 { | |
| 184 const unsigned short *code = *code_ptr; | |
| 185 | |
| 186 if (code[0] < 0xd800 || code[0] > 0xdc00) { | |
| 187 *code_ptr += 1; | |
| 188 return (uint32_t)code[0]; | |
| 189 } else { | |
| 190 *code_ptr += 2; | |
| 191 return 0x10000 + ((uint32_t)code[1] - 0xdc00) + | |
| 192 (((uint32_t)code[0] - 0xd800) << 10); | |
| 193 } | |
| 194 } | |
| 195 | |
| 196 const char *ucdn_get_unicode_version(void) | |
| 197 { | |
| 198 return UNIDATA_VERSION; | |
| 199 } | |
| 200 | |
| 201 int ucdn_get_combining_class(uint32_t code) | |
| 202 { | |
| 203 return get_ucd_record(code)->combining; | |
| 204 } | |
| 205 | |
| 206 int ucdn_get_east_asian_width(uint32_t code) | |
| 207 { | |
| 208 return get_ucd_record(code)->east_asian_width; | |
| 209 } | |
| 210 | |
| 211 int ucdn_get_general_category(uint32_t code) | |
| 212 { | |
| 213 return get_ucd_record(code)->category; | |
| 214 } | |
| 215 | |
| 216 int ucdn_get_bidi_class(uint32_t code) | |
| 217 { | |
| 218 return get_ucd_record(code)->bidi_class; | |
| 219 } | |
| 220 | |
| 221 int ucdn_get_mirrored(uint32_t code) | |
| 222 { | |
| 223 return ucdn_mirror(code) != code; | |
| 224 } | |
| 225 | |
| 226 int ucdn_get_script(uint32_t code) | |
| 227 { | |
| 228 return get_ucd_record(code)->script; | |
| 229 } | |
| 230 | |
| 231 int ucdn_get_linebreak_class(uint32_t code) | |
| 232 { | |
| 233 return get_ucd_record(code)->linebreak_class; | |
| 234 } | |
| 235 | |
| 236 int ucdn_get_resolved_linebreak_class(uint32_t code) | |
| 237 { | |
| 238 const UCDRecord *record = get_ucd_record(code); | |
| 239 | |
| 240 switch (record->linebreak_class) | |
| 241 { | |
| 242 case UCDN_LINEBREAK_CLASS_AI: | |
| 243 case UCDN_LINEBREAK_CLASS_SG: | |
| 244 case UCDN_LINEBREAK_CLASS_XX: | |
| 245 return UCDN_LINEBREAK_CLASS_AL; | |
| 246 | |
| 247 case UCDN_LINEBREAK_CLASS_SA: | |
| 248 if (record->category == UCDN_GENERAL_CATEGORY_MC || | |
| 249 record->category == UCDN_GENERAL_CATEGORY_MN) | |
| 250 return UCDN_LINEBREAK_CLASS_CM; | |
| 251 return UCDN_LINEBREAK_CLASS_AL; | |
| 252 | |
| 253 case UCDN_LINEBREAK_CLASS_CJ: | |
| 254 return UCDN_LINEBREAK_CLASS_NS; | |
| 255 | |
| 256 case UCDN_LINEBREAK_CLASS_CB: | |
| 257 return UCDN_LINEBREAK_CLASS_B2; | |
| 258 | |
| 259 case UCDN_LINEBREAK_CLASS_NL: | |
| 260 return UCDN_LINEBREAK_CLASS_BK; | |
| 261 | |
| 262 default: | |
| 263 return record->linebreak_class; | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 uint32_t ucdn_mirror(uint32_t code) | |
| 268 { | |
| 269 MirrorPair mp = {0}; | |
| 270 MirrorPair *res; | |
| 271 | |
| 272 mp.from = code; | |
| 273 res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, | |
| 274 sizeof(MirrorPair), compare_mp); | |
| 275 | |
| 276 if (res == NULL) | |
| 277 return code; | |
| 278 else | |
| 279 return res->to; | |
| 280 } | |
| 281 | |
| 282 uint32_t ucdn_paired_bracket(uint32_t code) | |
| 283 { | |
| 284 BracketPair *res = search_bp(code); | |
| 285 if (res == NULL) | |
| 286 return code; | |
| 287 else | |
| 288 return res->to; | |
| 289 } | |
| 290 | |
| 291 int ucdn_paired_bracket_type(uint32_t code) | |
| 292 { | |
| 293 BracketPair *res = search_bp(code); | |
| 294 if (res == NULL) | |
| 295 return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE; | |
| 296 else | |
| 297 return res->type; | |
| 298 } | |
| 299 | |
| 300 int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b) | |
| 301 { | |
| 302 const unsigned short *rec; | |
| 303 int len; | |
| 304 | |
| 305 if (hangul_pair_decompose(code, a, b)) | |
| 306 return 1; | |
| 307 | |
| 308 rec = get_decomp_record(code); | |
| 309 len = rec[0] >> 8; | |
| 310 | |
| 311 if ((rec[0] & 0xff) != 0 || len == 0) | |
| 312 return 0; | |
| 313 | |
| 314 rec++; | |
| 315 *a = decode_utf16(&rec); | |
| 316 if (len > 1) | |
| 317 *b = decode_utf16(&rec); | |
| 318 else | |
| 319 *b = 0; | |
| 320 | |
| 321 return 1; | |
| 322 } | |
| 323 | |
| 324 int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b) | |
| 325 { | |
| 326 int l, r, index, indexi, offset; | |
| 327 | |
| 328 if (hangul_pair_compose(code, a, b)) | |
| 329 return 1; | |
| 330 | |
| 331 l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex)); | |
| 332 r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex)); | |
| 333 | |
| 334 if (l < 0 || r < 0) | |
| 335 return 0; | |
| 336 | |
| 337 indexi = l * TOTAL_LAST + r; | |
| 338 index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1; | |
| 339 offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1); | |
| 340 index = comp_index1[index + offset] << COMP_SHIFT2; | |
| 341 offset = indexi & ((1<<COMP_SHIFT2) - 1); | |
| 342 *code = comp_data[index + offset]; | |
| 343 | |
| 344 return *code != 0; | |
| 345 } | |
| 346 | |
| 347 int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed) | |
| 348 { | |
| 349 int i, len; | |
| 350 const unsigned short *rec = get_decomp_record(code); | |
| 351 len = rec[0] >> 8; | |
| 352 | |
| 353 if (len == 0) | |
| 354 return 0; | |
| 355 | |
| 356 rec++; | |
| 357 for (i = 0; i < len; i++) | |
| 358 decomposed[i] = decode_utf16(&rec); | |
| 359 | |
| 360 return len; | |
| 361 } |
