comparison mupdf-source/thirdparty/zint/backend/tools/gen_eci_sb_h.php @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 <?php
2 /* Generate ECI single-byte tables & routines from unicode.org mapping files */
3 /*
4 libzint - the open source barcode library
5 Copyright (C) 2022-2023 Robin Stuart <rstuart114@gmail.com>
6 */
7 /* SPDX-License-Identifier: BSD-3-Clause */
8 /*
9 * To create "backend/eci_sb.h" (from project root directory):
10 *
11 * php backend/tools/gen_eci_sb_h.php
12 */
13
14 $basename = basename(__FILE__);
15 $dirname = dirname(__FILE__);
16
17 $opts = getopt('d:o:');
18 $out_dirname = isset($opts['o']) ? $opts['o'] : ($dirname . '/..'); // Where to put output.
19
20 $out = array();
21
22 $head = <<<'EOD'
23 /* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php"
24 from "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT"
25 and "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */
26 /*
27 libzint - the open source barcode library
28 Copyright (C) 2021-2022 Robin Stuart <rstuart114@gmail.com>
29
30 Redistribution and use in source and binary forms, with or without
31 modification, are permitted provided that the following conditions
32 are met:
33
34 1. Redistributions of source code must retain the above copyright
35 notice, this list of conditions and the following disclaimer.
36 2. Redistributions in binary form must reproduce the above copyright
37 notice, this list of conditions and the following disclaimer in the
38 documentation and/or other materials provided with the distribution.
39 3. Neither the name of the project nor the names of its contributors
40 may be used to endorse or promote products derived from this software
41 without specific prior written permission.
42
43 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
44 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
47 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 SUCH DAMAGE.
54 */
55 /* SPDX-License-Identifier: BSD-3-Clause */
56
57 #ifndef Z_ECI_SB_H
58 #define Z_ECI_SB_H
59 EOD;
60
61 $out = explode("\n", $head);
62
63 $u_iso8859 = <<<'EOD'
64
65 /* Forward reference to base ISO/IEC 8859 routine - see "eci.c" */
66 static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
67 const unsigned char *tab_sb, int e, unsigned char *dest);
68 EOD;
69
70 $out = array_merge($out, explode("\n", $u_iso8859));
71
72 $iso8859_comments = array(
73 array(), array(), // 0-1
74 // ECI Description
75 array( '4', 'Latin alphabet No. 2 (Latin-2)' ),
76 array( '5', 'Latin alphabet No. 3 (Latin-3) (South European)' ),
77 array( '6', 'Latin alphabet No. 4 (Latin-4) (North European)' ),
78 array( '7', 'Latin/Cyrillic' ),
79 array( '8', 'Latin/Arabic' ),
80 array( '9', 'Latin/Greek' ),
81 array( '10', 'Latin/Hebrew' ),
82 array( '11', 'Latin alphabet No. 5 (Latin-5) (Latin/Turkish)' ),
83 array( '12', 'Latin alphabet No. 6 (Latin-6) (Nordic)' ),
84 array( '13', 'Latin/Thai' ),
85 array(),
86 array( '15', 'Latin alphabet No. 7 (Latin-7) (Baltic Rim)' ),
87 array( '16', 'Latin alphabet No. 8 (Latin-8) (Celtic)' ),
88 array( '17', 'Latin alphabet No. 9 (Latin-9)' ),
89 array( '18', 'Latin alphabet No. 10 (Latin-10) (South-Eastern European)' ),
90 );
91
92 // Read the 8859 files.
93
94 $tot_8859 = 0;
95 for ($k = 2; $k <= 16; $k++) {
96 if ($k == 12) continue;
97
98 $file = 'https://unicode.org/Public/MAPPINGS/ISO8859/' . '8859-' . $k . '.TXT';
99
100 if (($get = file_get_contents($file)) === false) {
101 error_log($error = "$basename: ERROR: Could not read mapping file \"$file\"");
102 exit($error . PHP_EOL);
103 }
104
105 $lines = explode("\n", $get);
106
107 // Parse the file.
108
109 $sort = array();
110 $sb = array();
111 $same = array();
112 foreach ($lines as $line) {
113 $line = trim($line);
114 if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) {
115 continue;
116 }
117 $matches = array();
118 if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) {
119 $mb = hexdec($matches[1]);
120 $unicode = hexdec($matches[2]);
121 if ($unicode >= 0xA0) {
122 if ($unicode <= 0xFF && $unicode == $mb) {
123 $same[] = $mb;
124 } else {
125 $sort[] = $unicode;
126 $sb[] = $mb;
127 }
128 }
129 }
130 }
131
132 sort($same);
133 array_multisort($sort, $sb);
134
135 $s = array( 0, 0, 0, 0, 0, 0 );
136 for ($i = 0, $cnt = count($same); $i < $cnt; $i++) {
137 $v = $same[$i] - 0xA0;
138 $j = $v >> 4;
139 $s[$j] |= 1 << ($v & 0xF);
140 }
141
142 // Output.
143
144 $out[] = '';
145 $out[] = '/* Tables for ISO/IEC 8859-' . $k . ' */';
146 $out[] = 'static const unsigned short iso8859_' . $k . '_s[6] = { /* Straight-thru bit-flags */';
147 $line = ' ';
148 for ($i = 0; $i < 6; $i++) {
149 $line .= sprintf(" 0x%04X,", $s[$i]);
150 }
151 $out[] = $line;
152 $out[] = '};';
153 $tot_8859 += 6 * 2;
154
155 $cnt = count($sort);
156 $out[] = 'static const unsigned short iso8859_' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */';
157 $line = ' ';
158 for ($i = 0; $i < $cnt; $i++) {
159 if ($i && $i % 8 === 0) {
160 $out[] = $line;
161 $line = ' ';
162 }
163 $line .= sprintf(' 0x%04X,', $sort[$i]);
164 }
165 if ($line !== ' ') {
166 $out[] = $line;
167 }
168 $out[] = '};';
169 $tot_8859 += $cnt * 2;
170
171 $cnt = count($sb);
172 $out[] = 'static const unsigned char iso8859_' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */';
173 $line = ' ';
174 for ($i = 0; $i < $cnt; $i++) {
175 if ($i && $i % 8 === 0) {
176 $out[] = $line;
177 $line = ' ';
178 }
179 $line .= sprintf(' 0x%02X,', $sb[$i]);
180 }
181 if ($line !== ' ') {
182 $out[] = $line;
183 }
184 $out[] = '};';
185 $tot_8859 += $cnt;
186
187 $out[] = '';
188 $out[] = '/* ECI ' . $iso8859_comments[$k][0] . ' ISO/IEC 8859-' . $k . ' ' . $iso8859_comments[$k][1] . ' */';
189 $out[] = 'static int u_iso8859_' . $k . '(const unsigned int u, unsigned char *dest) {';
190 $out[] = ' return u_iso8859(u, iso8859_' . $k . '_s, iso8859_' . $k . '_u, iso8859_' . $k . '_sb, ARRAY_SIZE(iso8859_' . $k . '_u) - 1, dest);';
191 $out[] = '}';
192 }
193
194 if (0) {
195 $out[] = '';
196 $out[] = '/* Total ISO/IEC 8859 bytes: ' . $tot_8859 . ' */';
197 }
198
199 $u_cp125x = <<<'EOD'
200
201 /* Forward reference to base Windows-125x routine - see "eci.c" */
202 static int u_cp125x(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
203 const unsigned char *tab_sb, int e, unsigned char *dest);
204 EOD;
205
206 $out = array_merge($out, explode("\n", $u_cp125x));
207
208 $cp125x_comments = array(
209 // ECI Description
210 array( '21', 'Latin 2 (Central Europe)' ),
211 array( '22', 'Cyrillic' ),
212 array( '23', 'Latin 1' ),
213 array(), array(), array(),
214 array( '24', 'Arabic' ),
215 );
216
217 // Read the Windows 125x files.
218
219 $tot_cp125x = 0;
220 for ($k = 0; $k <= 6; $k++) {
221 if ($k >= 3 && $k <= 5) continue;
222
223 $file = 'https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/' . 'CP125' . $k . '.TXT';
224
225 if (($get = file_get_contents($file)) === false) {
226 error_log($error = "$basename: ERROR: Could not read mapping file \"$file\"");
227 exit($error . PHP_EOL);
228 }
229
230 $lines = explode("\n", $get);
231
232 // Parse the file.
233
234 $sort = array();
235 $sb = array();
236 $same = array();
237 foreach ($lines as $line) {
238 $line = trim($line);
239 if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) {
240 continue;
241 }
242 $matches = array();
243 if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) {
244 $mb = hexdec($matches[1]);
245 $unicode = hexdec($matches[2]);
246 if ($unicode >= 0x80) {
247 if ($unicode <= 0xFF && $unicode == $mb) {
248 $same[] = $mb;
249 } else {
250 $sort[] = $unicode;
251 $sb[] = $mb;
252 }
253 }
254 }
255 }
256
257 sort($same);
258 array_multisort($sort, $sb);
259
260 $s = array( 0, 0, 0, 0, 0, 0 );
261 for ($i = 0, $cnt = count($same); $i < $cnt; $i++) {
262 $v = $same[$i] - 0xA0;
263 $j = $v >> 4;
264 $s[$j] |= 1 << ($v & 0xF);
265 }
266
267 // Output.
268
269 $out[] = '';
270 $out[] = '/* Tables for Windows 125' . $k . ' */';
271 $out[] = 'static const unsigned short cp125' . $k . '_s[6] = { /* Straight-thru bit-flags */';
272 $line = ' ';
273 for ($i = 0; $i < 6; $i++) {
274 $line .= sprintf(" 0x%04X,", $s[$i]);
275 }
276 $out[] = $line;
277 $out[] = '};';
278 $tot_cp125x += 6 * 2;
279
280 $cnt = count($sort);
281 $out[] = 'static const unsigned short cp125' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */';
282 $line = ' ';
283 for ($i = 0; $i < $cnt; $i++) {
284 if ($i && $i % 8 === 0) {
285 $out[] = $line;
286 $line = ' ';
287 }
288 $line .= sprintf(' 0x%04X,', $sort[$i]);
289 }
290 if ($line !== ' ') {
291 $out[] = $line;
292 }
293 $out[] = '};';
294 $tot_cp125x += $cnt * 2;
295
296 $cnt = count($sb);
297 $out[] = 'static const unsigned char cp125' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */';
298 $line = ' ';
299 for ($i = 0; $i < $cnt; $i++) {
300 if ($i && $i % 8 === 0) {
301 $out[] = $line;
302 $line = ' ';
303 }
304 $line .= sprintf(' 0x%02X,', $sb[$i]);
305 }
306 if ($line !== ' ') {
307 $out[] = $line;
308 }
309 $out[] = '};';
310 $tot_cp125x += $cnt;
311
312 $out[] = '';
313 $out[] = '/* ECI ' . $cp125x_comments[$k][0] . ' Windows-125' . $k . ' ' . $cp125x_comments[$k][1] . ' */';
314 $out[] = 'static int u_cp125' . $k . '(const unsigned int u, unsigned char *dest) {';
315 $out[] = ' return u_cp125x(u, cp125' . $k . '_s, cp125' . $k . '_u, cp125' . $k . '_sb, ARRAY_SIZE(cp125' . $k . '_u) - 1, dest);';
316 $out[] = '}';
317 }
318
319 if (0) {
320 $out[] = '';
321 $out[] = '/* Total Windows 125x bytes: ' . $tot_cp125x . ' */';
322
323 $out[] = '';
324 $out[] = '/* Total bytes: ' . ($tot_8859 + $tot_cp125x) . ' */';
325 }
326
327 $out[] = '';
328 $out[] = '#endif /* Z_ECI_SB_H */';
329
330 file_put_contents($out_dirname . '/eci_sb.h', implode("\n", $out) . "\n");
331
332 /* vim: set ts=4 sw=4 et : */