Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/ucdn.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/ucdn.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "mupdf/fitz.h" +#include "mupdf/ucdn.h" + +#include <stdio.h> +#include <stdlib.h> + +typedef struct { + unsigned char category; + unsigned char combining; + unsigned char bidi_class; + unsigned char east_asian_width; + unsigned char script; + unsigned char linebreak_class; +} UCDRecord; + +typedef struct { + unsigned short from, to; +} MirrorPair; + +typedef struct { + unsigned short from, to; + unsigned char type; +} BracketPair; + +typedef struct { + unsigned int start; + short count, index; +} Reindex; + +#include "ucdn_db.h" + +/* constants required for Hangul (de)composition */ +#define SBASE 0xAC00 +#define LBASE 0x1100 +#define VBASE 0x1161 +#define TBASE 0x11A7 +#define SCOUNT 11172 +#define LCOUNT 19 +#define VCOUNT 21 +#define TCOUNT 28 +#define NCOUNT (VCOUNT * TCOUNT) + +static const UCDRecord *get_ucd_record(uint32_t code) +{ + int index, offset; + + if (code >= 0x110000) + index = 0; + else { + index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1; + offset = (code >> SHIFT2) & ((1<<SHIFT1) - 1); + index = index1[index + offset] << SHIFT2; + offset = code & ((1<<SHIFT2) - 1); + index = index2[index + offset]; + } + + return &ucd_records[index]; +} + +static const unsigned short *get_decomp_record(uint32_t code) +{ + int index, offset; + + if (code >= 0x110000) + index = 0; + else { + index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)] + << DECOMP_SHIFT1; + offset = (code >> DECOMP_SHIFT2) & ((1<<DECOMP_SHIFT1) - 1); + index = decomp_index1[index + offset] << DECOMP_SHIFT2; + offset = code & ((1<<DECOMP_SHIFT2) - 1); + index = decomp_index2[index + offset]; + } + + return &decomp_data[index]; +} + +static int compare_reindex(const void *a, const void *b) +{ + Reindex *ra = (Reindex *)a; + Reindex *rb = (Reindex *)b; + + if (ra->start < rb->start) + return -1; + else if (ra->start > (rb->start + rb->count)) + return 1; + else + return 0; +} + +static int get_comp_index(uint32_t code, const Reindex *idx, size_t len) +{ + Reindex *res; + Reindex r = {0, 0, 0}; + r.start = code; + res = (Reindex *) bsearch(&r, idx, len, sizeof(Reindex), compare_reindex); + + if (res != NULL) + return res->index + (code - res->start); + else + return -1; +} + +static int compare_mp(const void *a, const void *b) +{ + MirrorPair *mpa = (MirrorPair *)a; + MirrorPair *mpb = (MirrorPair *)b; + return mpa->from - mpb->from; +} + +static int compare_bp(const void *a, const void *b) +{ + BracketPair *bpa = (BracketPair *)a; + BracketPair *bpb = (BracketPair *)b; + return bpa->from - bpb->from; +} + +static BracketPair *search_bp(uint32_t code) +{ + BracketPair bp = {0,0,2}; + BracketPair *res; + + bp.from = code; + res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, + sizeof(BracketPair), compare_bp); + return res; +} + +static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) +{ + int si = code - SBASE; + + if (si < 0 || si >= SCOUNT) + return 0; + + if (si % TCOUNT) { + /* LV,T */ + *a = SBASE + (si / TCOUNT) * TCOUNT; + *b = TBASE + (si % TCOUNT); + return 3; + } else { + /* L,V */ + *a = LBASE + (si / NCOUNT); + *b = VBASE + (si % NCOUNT) / TCOUNT; + return 2; + } +} + +static int hangul_pair_compose(uint32_t *code, uint32_t a, uint32_t b) +{ + if (a >= SBASE && a < (SBASE + SCOUNT) && b >= TBASE && b < (TBASE + TCOUNT)) { + /* LV,T */ + *code = a + (b - TBASE); + return 3; + } else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) { + /* L,V */ + int li = a - LBASE; + int vi = b - VBASE; + *code = SBASE + li * NCOUNT + vi * TCOUNT; + return 2; + } else { + return 0; + } +} + +static uint32_t decode_utf16(const unsigned short **code_ptr) +{ + const unsigned short *code = *code_ptr; + + if (code[0] < 0xd800 || code[0] > 0xdc00) { + *code_ptr += 1; + return (uint32_t)code[0]; + } else { + *code_ptr += 2; + return 0x10000 + ((uint32_t)code[1] - 0xdc00) + + (((uint32_t)code[0] - 0xd800) << 10); + } +} + +const char *ucdn_get_unicode_version(void) +{ + return UNIDATA_VERSION; +} + +int ucdn_get_combining_class(uint32_t code) +{ + return get_ucd_record(code)->combining; +} + +int ucdn_get_east_asian_width(uint32_t code) +{ + return get_ucd_record(code)->east_asian_width; +} + +int ucdn_get_general_category(uint32_t code) +{ + return get_ucd_record(code)->category; +} + +int ucdn_get_bidi_class(uint32_t code) +{ + return get_ucd_record(code)->bidi_class; +} + +int ucdn_get_mirrored(uint32_t code) +{ + return ucdn_mirror(code) != code; +} + +int ucdn_get_script(uint32_t code) +{ + return get_ucd_record(code)->script; +} + +int ucdn_get_linebreak_class(uint32_t code) +{ + return get_ucd_record(code)->linebreak_class; +} + +int ucdn_get_resolved_linebreak_class(uint32_t code) +{ + const UCDRecord *record = get_ucd_record(code); + + switch (record->linebreak_class) + { + case UCDN_LINEBREAK_CLASS_AI: + case UCDN_LINEBREAK_CLASS_SG: + case UCDN_LINEBREAK_CLASS_XX: + return UCDN_LINEBREAK_CLASS_AL; + + case UCDN_LINEBREAK_CLASS_SA: + if (record->category == UCDN_GENERAL_CATEGORY_MC || + record->category == UCDN_GENERAL_CATEGORY_MN) + return UCDN_LINEBREAK_CLASS_CM; + return UCDN_LINEBREAK_CLASS_AL; + + case UCDN_LINEBREAK_CLASS_CJ: + return UCDN_LINEBREAK_CLASS_NS; + + case UCDN_LINEBREAK_CLASS_CB: + return UCDN_LINEBREAK_CLASS_B2; + + case UCDN_LINEBREAK_CLASS_NL: + return UCDN_LINEBREAK_CLASS_BK; + + default: + return record->linebreak_class; + } +} + +uint32_t ucdn_mirror(uint32_t code) +{ + MirrorPair mp = {0}; + MirrorPair *res; + + mp.from = code; + res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, + sizeof(MirrorPair), compare_mp); + + if (res == NULL) + return code; + else + return res->to; +} + +uint32_t ucdn_paired_bracket(uint32_t code) +{ + BracketPair *res = search_bp(code); + if (res == NULL) + return code; + else + return res->to; +} + +int ucdn_paired_bracket_type(uint32_t code) +{ + BracketPair *res = search_bp(code); + if (res == NULL) + return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE; + else + return res->type; +} + +int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b) +{ + const unsigned short *rec; + int len; + + if (hangul_pair_decompose(code, a, b)) + return 1; + + rec = get_decomp_record(code); + len = rec[0] >> 8; + + if ((rec[0] & 0xff) != 0 || len == 0) + return 0; + + rec++; + *a = decode_utf16(&rec); + if (len > 1) + *b = decode_utf16(&rec); + else + *b = 0; + + return 1; +} + +int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b) +{ + int l, r, index, indexi, offset; + + if (hangul_pair_compose(code, a, b)) + return 1; + + l = get_comp_index(a, nfc_first, sizeof(nfc_first) / sizeof(Reindex)); + r = get_comp_index(b, nfc_last, sizeof(nfc_last) / sizeof(Reindex)); + + if (l < 0 || r < 0) + return 0; + + indexi = l * TOTAL_LAST + r; + index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1; + offset = (indexi >> COMP_SHIFT2) & ((1<<COMP_SHIFT1) - 1); + index = comp_index1[index + offset] << COMP_SHIFT2; + offset = indexi & ((1<<COMP_SHIFT2) - 1); + *code = comp_data[index + offset]; + + return *code != 0; +} + +int ucdn_compat_decompose(uint32_t code, uint32_t *decomposed) +{ + int i, len; + const unsigned short *rec = get_decomp_record(code); + len = rec[0] >> 8; + + if (len == 0) + return 0; + + rec++; + for (i = 0; i < len; i++) + decomposed[i] = decode_utf16(&rec); + + return len; +}
