Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/fitz/encodings.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2021 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "mupdf/pdf.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 #include <stdlib.h> | |
| 28 | |
| 29 #include "encodings.h" | |
| 30 #include "glyphlist.h" | |
| 31 #include "smallcaps.h" | |
| 32 | |
| 33 #define FROM_UNICODE(ENC) \ | |
| 34 int l = 0; \ | |
| 35 int r = nelem(ENC##_from_unicode) - 1; \ | |
| 36 if (u < 128) \ | |
| 37 return u; \ | |
| 38 while (l <= r) \ | |
| 39 { \ | |
| 40 int m = (l + r) >> 1; \ | |
| 41 if (u < ENC##_from_unicode[m].u) \ | |
| 42 r = m - 1; \ | |
| 43 else if (u > ENC##_from_unicode[m].u) \ | |
| 44 l = m + 1; \ | |
| 45 else \ | |
| 46 return ENC##_from_unicode[m].c; \ | |
| 47 } \ | |
| 48 return -1; \ | |
| 49 | |
| 50 int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) } | |
| 51 int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) } | |
| 52 int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) } | |
| 53 int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) } | |
| 54 int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) } | |
| 55 int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) } | |
| 56 | |
| 57 int | |
| 58 fz_unicode_from_glyph_name_strict(const char *name) | |
| 59 { | |
| 60 int l = 0; | |
| 61 int r = nelem(single_name_list) - 1; | |
| 62 | |
| 63 while (l <= r) | |
| 64 { | |
| 65 int m = (l + r) >> 1; | |
| 66 int c = strcmp(name, single_name_list[m]); | |
| 67 if (c < 0) | |
| 68 r = m - 1; | |
| 69 else if (c > 0) | |
| 70 l = m + 1; | |
| 71 else | |
| 72 return single_code_list[m]; | |
| 73 } | |
| 74 return 0; | |
| 75 } | |
| 76 | |
| 77 static int | |
| 78 read_num(const char *p, int base) | |
| 79 { | |
| 80 char *e; | |
| 81 int v = strtol(p, &e, base); | |
| 82 if (*e != 0) | |
| 83 return 0; | |
| 84 return v; | |
| 85 } | |
| 86 | |
| 87 int | |
| 88 fz_unicode_from_glyph_name(const char *name) | |
| 89 { | |
| 90 char buf[64]; | |
| 91 char *p; | |
| 92 int l = 0; | |
| 93 int r = nelem(single_name_list) - 1; | |
| 94 int code = 0; | |
| 95 | |
| 96 fz_strlcpy(buf, name, sizeof buf); | |
| 97 | |
| 98 /* kill anything after first period and underscore */ | |
| 99 p = strchr(buf, '.'); | |
| 100 if (p) p[0] = 0; | |
| 101 p = strchr(buf, '_'); | |
| 102 if (p) | |
| 103 { | |
| 104 /* Hacky tests for alternative ligature names */ | |
| 105 if (buf[0] == 'f') | |
| 106 { | |
| 107 if (!strcmp(buf, "f_f")) | |
| 108 strcpy(buf, "ff"); | |
| 109 else if (!strcmp(buf, "f_f_i")) | |
| 110 strcpy(buf, "ffi"); | |
| 111 else if (!strcmp(buf, "f_f_l")) | |
| 112 strcpy(buf, "ffl"); | |
| 113 else if (!strcmp(buf, "f_i")) | |
| 114 strcpy(buf, "fi"); | |
| 115 else if (!strcmp(buf, "f_l")) | |
| 116 strcpy(buf, "fl"); | |
| 117 else | |
| 118 p[0] = 0; | |
| 119 } | |
| 120 else | |
| 121 p[0] = 0; | |
| 122 } | |
| 123 | |
| 124 while (l <= r) | |
| 125 { | |
| 126 int m = (l + r) >> 1; | |
| 127 int c = strcmp(buf, single_name_list[m]); | |
| 128 if (c < 0) | |
| 129 r = m - 1; | |
| 130 else if (c > 0) | |
| 131 l = m + 1; | |
| 132 else | |
| 133 return single_code_list[m]; | |
| 134 } | |
| 135 | |
| 136 if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7) | |
| 137 code = read_num(buf+3, 16); | |
| 138 else if (buf[0] == 'u') | |
| 139 code = read_num(buf+1, 16); | |
| 140 else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0) | |
| 141 code = read_num(buf+1, 10); | |
| 142 else | |
| 143 code = read_num(buf, 10); | |
| 144 | |
| 145 return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER; | |
| 146 } | |
| 147 | |
| 148 static const char *empty_dup_list[] = { 0 }; | |
| 149 | |
| 150 const char ** | |
| 151 fz_duplicate_glyph_names_from_unicode(int ucs) | |
| 152 { | |
| 153 int l = 0; | |
| 154 int r = nelem(agl_dup_offsets) / 2 - 1; | |
| 155 while (l <= r) | |
| 156 { | |
| 157 int m = (l + r) >> 1; | |
| 158 if (ucs < agl_dup_offsets[m << 1]) | |
| 159 r = m - 1; | |
| 160 else if (ucs > agl_dup_offsets[m << 1]) | |
| 161 l = m + 1; | |
| 162 else | |
| 163 return agl_dup_names + agl_dup_offsets[(m << 1) + 1]; | |
| 164 } | |
| 165 return empty_dup_list; | |
| 166 } | |
| 167 | |
| 168 const char * | |
| 169 fz_glyph_name_from_unicode_sc(int u) | |
| 170 { | |
| 171 int l = 0; | |
| 172 int r = nelem(glyph_name_from_unicode_sc) / 2 - 1; | |
| 173 while (l <= r) | |
| 174 { | |
| 175 int m = (l + r) >> 1; | |
| 176 if (u < glyph_name_from_unicode_sc[m].u) | |
| 177 r = m - 1; | |
| 178 else if (u > glyph_name_from_unicode_sc[m].u) | |
| 179 l = m + 1; | |
| 180 else | |
| 181 return glyph_name_from_unicode_sc[m].n; | |
| 182 } | |
| 183 return NULL; | |
| 184 } |
