Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/encodings.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/encodings.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,184 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" + +#include <string.h> +#include <stdlib.h> + +#include "encodings.h" +#include "glyphlist.h" +#include "smallcaps.h" + +#define FROM_UNICODE(ENC) \ + int l = 0; \ + int r = nelem(ENC##_from_unicode) - 1; \ + if (u < 128) \ + return u; \ + while (l <= r) \ + { \ + int m = (l + r) >> 1; \ + if (u < ENC##_from_unicode[m].u) \ + r = m - 1; \ + else if (u > ENC##_from_unicode[m].u) \ + l = m + 1; \ + else \ + return ENC##_from_unicode[m].c; \ + } \ + return -1; \ + +int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) } +int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) } +int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) } +int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) } +int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) } +int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) } + +int +fz_unicode_from_glyph_name_strict(const char *name) +{ + int l = 0; + int r = nelem(single_name_list) - 1; + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(name, single_name_list[m]); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return single_code_list[m]; + } + return 0; +} + +static int +read_num(const char *p, int base) +{ + char *e; + int v = strtol(p, &e, base); + if (*e != 0) + return 0; + return v; +} + +int +fz_unicode_from_glyph_name(const char *name) +{ + char buf[64]; + char *p; + int l = 0; + int r = nelem(single_name_list) - 1; + int code = 0; + + fz_strlcpy(buf, name, sizeof buf); + + /* kill anything after first period and underscore */ + p = strchr(buf, '.'); + if (p) p[0] = 0; + p = strchr(buf, '_'); + if (p) + { + /* Hacky tests for alternative ligature names */ + if (buf[0] == 'f') + { + if (!strcmp(buf, "f_f")) + strcpy(buf, "ff"); + else if (!strcmp(buf, "f_f_i")) + strcpy(buf, "ffi"); + else if (!strcmp(buf, "f_f_l")) + strcpy(buf, "ffl"); + else if (!strcmp(buf, "f_i")) + strcpy(buf, "fi"); + else if (!strcmp(buf, "f_l")) + strcpy(buf, "fl"); + else + p[0] = 0; + } + else + p[0] = 0; + } + + while (l <= r) + { + int m = (l + r) >> 1; + int c = strcmp(buf, single_name_list[m]); + if (c < 0) + r = m - 1; + else if (c > 0) + l = m + 1; + else + return single_code_list[m]; + } + + if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7) + code = read_num(buf+3, 16); + else if (buf[0] == 'u') + code = read_num(buf+1, 16); + else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0) + code = read_num(buf+1, 10); + else + code = read_num(buf, 10); + + return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER; +} + +static const char *empty_dup_list[] = { 0 }; + +const char ** +fz_duplicate_glyph_names_from_unicode(int ucs) +{ + int l = 0; + int r = nelem(agl_dup_offsets) / 2 - 1; + while (l <= r) + { + int m = (l + r) >> 1; + if (ucs < agl_dup_offsets[m << 1]) + r = m - 1; + else if (ucs > agl_dup_offsets[m << 1]) + l = m + 1; + else + return agl_dup_names + agl_dup_offsets[(m << 1) + 1]; + } + return empty_dup_list; +} + +const char * +fz_glyph_name_from_unicode_sc(int u) +{ + int l = 0; + int r = nelem(glyph_name_from_unicode_sc) / 2 - 1; + while (l <= r) + { + int m = (l + r) >> 1; + if (u < glyph_name_from_unicode_sc[m].u) + r = m - 1; + else if (u > glyph_name_from_unicode_sc[m].u) + l = m + 1; + else + return glyph_name_from_unicode_sc[m].n; + } + return NULL; +}
