comparison mupdf-source/source/fitz/encodings.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 #include <string.h>
27 #include <stdlib.h>
28
29 #include "encodings.h"
30 #include "glyphlist.h"
31 #include "smallcaps.h"
32
33 #define FROM_UNICODE(ENC) \
34 int l = 0; \
35 int r = nelem(ENC##_from_unicode) - 1; \
36 if (u < 128) \
37 return u; \
38 while (l <= r) \
39 { \
40 int m = (l + r) >> 1; \
41 if (u < ENC##_from_unicode[m].u) \
42 r = m - 1; \
43 else if (u > ENC##_from_unicode[m].u) \
44 l = m + 1; \
45 else \
46 return ENC##_from_unicode[m].c; \
47 } \
48 return -1; \
49
50 int fz_iso8859_1_from_unicode(int u) { FROM_UNICODE(iso8859_1) }
51 int fz_iso8859_7_from_unicode(int u) { FROM_UNICODE(iso8859_7) }
52 int fz_koi8u_from_unicode(int u) { FROM_UNICODE(koi8u) }
53 int fz_windows_1250_from_unicode(int u) { FROM_UNICODE(windows_1250) }
54 int fz_windows_1251_from_unicode(int u) { FROM_UNICODE(windows_1251) }
55 int fz_windows_1252_from_unicode(int u) { FROM_UNICODE(windows_1252) }
56
57 int
58 fz_unicode_from_glyph_name_strict(const char *name)
59 {
60 int l = 0;
61 int r = nelem(single_name_list) - 1;
62
63 while (l <= r)
64 {
65 int m = (l + r) >> 1;
66 int c = strcmp(name, single_name_list[m]);
67 if (c < 0)
68 r = m - 1;
69 else if (c > 0)
70 l = m + 1;
71 else
72 return single_code_list[m];
73 }
74 return 0;
75 }
76
77 static int
78 read_num(const char *p, int base)
79 {
80 char *e;
81 int v = strtol(p, &e, base);
82 if (*e != 0)
83 return 0;
84 return v;
85 }
86
87 int
88 fz_unicode_from_glyph_name(const char *name)
89 {
90 char buf[64];
91 char *p;
92 int l = 0;
93 int r = nelem(single_name_list) - 1;
94 int code = 0;
95
96 fz_strlcpy(buf, name, sizeof buf);
97
98 /* kill anything after first period and underscore */
99 p = strchr(buf, '.');
100 if (p) p[0] = 0;
101 p = strchr(buf, '_');
102 if (p)
103 {
104 /* Hacky tests for alternative ligature names */
105 if (buf[0] == 'f')
106 {
107 if (!strcmp(buf, "f_f"))
108 strcpy(buf, "ff");
109 else if (!strcmp(buf, "f_f_i"))
110 strcpy(buf, "ffi");
111 else if (!strcmp(buf, "f_f_l"))
112 strcpy(buf, "ffl");
113 else if (!strcmp(buf, "f_i"))
114 strcpy(buf, "fi");
115 else if (!strcmp(buf, "f_l"))
116 strcpy(buf, "fl");
117 else
118 p[0] = 0;
119 }
120 else
121 p[0] = 0;
122 }
123
124 while (l <= r)
125 {
126 int m = (l + r) >> 1;
127 int c = strcmp(buf, single_name_list[m]);
128 if (c < 0)
129 r = m - 1;
130 else if (c > 0)
131 l = m + 1;
132 else
133 return single_code_list[m];
134 }
135
136 if (buf[0] == 'u' && buf[1] == 'n' && buf[2] == 'i' && strlen(buf) == 7)
137 code = read_num(buf+3, 16);
138 else if (buf[0] == 'u')
139 code = read_num(buf+1, 16);
140 else if (buf[0] == 'a' && buf[1] != 0 && buf[2] != 0)
141 code = read_num(buf+1, 10);
142 else
143 code = read_num(buf, 10);
144
145 return (code > 0 && code <= 0x10ffff) ? code : FZ_REPLACEMENT_CHARACTER;
146 }
147
148 static const char *empty_dup_list[] = { 0 };
149
150 const char **
151 fz_duplicate_glyph_names_from_unicode(int ucs)
152 {
153 int l = 0;
154 int r = nelem(agl_dup_offsets) / 2 - 1;
155 while (l <= r)
156 {
157 int m = (l + r) >> 1;
158 if (ucs < agl_dup_offsets[m << 1])
159 r = m - 1;
160 else if (ucs > agl_dup_offsets[m << 1])
161 l = m + 1;
162 else
163 return agl_dup_names + agl_dup_offsets[(m << 1) + 1];
164 }
165 return empty_dup_list;
166 }
167
168 const char *
169 fz_glyph_name_from_unicode_sc(int u)
170 {
171 int l = 0;
172 int r = nelem(glyph_name_from_unicode_sc) / 2 - 1;
173 while (l <= r)
174 {
175 int m = (l + r) >> 1;
176 if (u < glyph_name_from_unicode_sc[m].u)
177 r = m - 1;
178 else if (u > glyph_name_from_unicode_sc[m].u)
179 l = m + 1;
180 else
181 return glyph_name_from_unicode_sc[m].n;
182 }
183 return NULL;
184 }