Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/zint/backend/tools/gen_eci_sb_h.php @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 <?php | |
| 2 /* Generate ECI single-byte tables & routines from unicode.org mapping files */ | |
| 3 /* | |
| 4 libzint - the open source barcode library | |
| 5 Copyright (C) 2022-2023 Robin Stuart <rstuart114@gmail.com> | |
| 6 */ | |
| 7 /* SPDX-License-Identifier: BSD-3-Clause */ | |
| 8 /* | |
| 9 * To create "backend/eci_sb.h" (from project root directory): | |
| 10 * | |
| 11 * php backend/tools/gen_eci_sb_h.php | |
| 12 */ | |
| 13 | |
| 14 $basename = basename(__FILE__); | |
| 15 $dirname = dirname(__FILE__); | |
| 16 | |
| 17 $opts = getopt('d:o:'); | |
| 18 $out_dirname = isset($opts['o']) ? $opts['o'] : ($dirname . '/..'); // Where to put output. | |
| 19 | |
| 20 $out = array(); | |
| 21 | |
| 22 $head = <<<'EOD' | |
| 23 /* eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php" | |
| 24 from "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" | |
| 25 and "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */ | |
| 26 /* | |
| 27 libzint - the open source barcode library | |
| 28 Copyright (C) 2021-2022 Robin Stuart <rstuart114@gmail.com> | |
| 29 | |
| 30 Redistribution and use in source and binary forms, with or without | |
| 31 modification, are permitted provided that the following conditions | |
| 32 are met: | |
| 33 | |
| 34 1. Redistributions of source code must retain the above copyright | |
| 35 notice, this list of conditions and the following disclaimer. | |
| 36 2. Redistributions in binary form must reproduce the above copyright | |
| 37 notice, this list of conditions and the following disclaimer in the | |
| 38 documentation and/or other materials provided with the distribution. | |
| 39 3. Neither the name of the project nor the names of its contributors | |
| 40 may be used to endorse or promote products derived from this software | |
| 41 without specific prior written permission. | |
| 42 | |
| 43 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 44 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 45 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 46 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
| 47 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 48 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 49 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 50 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 51 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 52 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 53 SUCH DAMAGE. | |
| 54 */ | |
| 55 /* SPDX-License-Identifier: BSD-3-Clause */ | |
| 56 | |
| 57 #ifndef Z_ECI_SB_H | |
| 58 #define Z_ECI_SB_H | |
| 59 EOD; | |
| 60 | |
| 61 $out = explode("\n", $head); | |
| 62 | |
| 63 $u_iso8859 = <<<'EOD' | |
| 64 | |
| 65 /* Forward reference to base ISO/IEC 8859 routine - see "eci.c" */ | |
| 66 static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u, | |
| 67 const unsigned char *tab_sb, int e, unsigned char *dest); | |
| 68 EOD; | |
| 69 | |
| 70 $out = array_merge($out, explode("\n", $u_iso8859)); | |
| 71 | |
| 72 $iso8859_comments = array( | |
| 73 array(), array(), // 0-1 | |
| 74 // ECI Description | |
| 75 array( '4', 'Latin alphabet No. 2 (Latin-2)' ), | |
| 76 array( '5', 'Latin alphabet No. 3 (Latin-3) (South European)' ), | |
| 77 array( '6', 'Latin alphabet No. 4 (Latin-4) (North European)' ), | |
| 78 array( '7', 'Latin/Cyrillic' ), | |
| 79 array( '8', 'Latin/Arabic' ), | |
| 80 array( '9', 'Latin/Greek' ), | |
| 81 array( '10', 'Latin/Hebrew' ), | |
| 82 array( '11', 'Latin alphabet No. 5 (Latin-5) (Latin/Turkish)' ), | |
| 83 array( '12', 'Latin alphabet No. 6 (Latin-6) (Nordic)' ), | |
| 84 array( '13', 'Latin/Thai' ), | |
| 85 array(), | |
| 86 array( '15', 'Latin alphabet No. 7 (Latin-7) (Baltic Rim)' ), | |
| 87 array( '16', 'Latin alphabet No. 8 (Latin-8) (Celtic)' ), | |
| 88 array( '17', 'Latin alphabet No. 9 (Latin-9)' ), | |
| 89 array( '18', 'Latin alphabet No. 10 (Latin-10) (South-Eastern European)' ), | |
| 90 ); | |
| 91 | |
| 92 // Read the 8859 files. | |
| 93 | |
| 94 $tot_8859 = 0; | |
| 95 for ($k = 2; $k <= 16; $k++) { | |
| 96 if ($k == 12) continue; | |
| 97 | |
| 98 $file = 'https://unicode.org/Public/MAPPINGS/ISO8859/' . '8859-' . $k . '.TXT'; | |
| 99 | |
| 100 if (($get = file_get_contents($file)) === false) { | |
| 101 error_log($error = "$basename: ERROR: Could not read mapping file \"$file\""); | |
| 102 exit($error . PHP_EOL); | |
| 103 } | |
| 104 | |
| 105 $lines = explode("\n", $get); | |
| 106 | |
| 107 // Parse the file. | |
| 108 | |
| 109 $sort = array(); | |
| 110 $sb = array(); | |
| 111 $same = array(); | |
| 112 foreach ($lines as $line) { | |
| 113 $line = trim($line); | |
| 114 if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) { | |
| 115 continue; | |
| 116 } | |
| 117 $matches = array(); | |
| 118 if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) { | |
| 119 $mb = hexdec($matches[1]); | |
| 120 $unicode = hexdec($matches[2]); | |
| 121 if ($unicode >= 0xA0) { | |
| 122 if ($unicode <= 0xFF && $unicode == $mb) { | |
| 123 $same[] = $mb; | |
| 124 } else { | |
| 125 $sort[] = $unicode; | |
| 126 $sb[] = $mb; | |
| 127 } | |
| 128 } | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 sort($same); | |
| 133 array_multisort($sort, $sb); | |
| 134 | |
| 135 $s = array( 0, 0, 0, 0, 0, 0 ); | |
| 136 for ($i = 0, $cnt = count($same); $i < $cnt; $i++) { | |
| 137 $v = $same[$i] - 0xA0; | |
| 138 $j = $v >> 4; | |
| 139 $s[$j] |= 1 << ($v & 0xF); | |
| 140 } | |
| 141 | |
| 142 // Output. | |
| 143 | |
| 144 $out[] = ''; | |
| 145 $out[] = '/* Tables for ISO/IEC 8859-' . $k . ' */'; | |
| 146 $out[] = 'static const unsigned short iso8859_' . $k . '_s[6] = { /* Straight-thru bit-flags */'; | |
| 147 $line = ' '; | |
| 148 for ($i = 0; $i < 6; $i++) { | |
| 149 $line .= sprintf(" 0x%04X,", $s[$i]); | |
| 150 } | |
| 151 $out[] = $line; | |
| 152 $out[] = '};'; | |
| 153 $tot_8859 += 6 * 2; | |
| 154 | |
| 155 $cnt = count($sort); | |
| 156 $out[] = 'static const unsigned short iso8859_' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */'; | |
| 157 $line = ' '; | |
| 158 for ($i = 0; $i < $cnt; $i++) { | |
| 159 if ($i && $i % 8 === 0) { | |
| 160 $out[] = $line; | |
| 161 $line = ' '; | |
| 162 } | |
| 163 $line .= sprintf(' 0x%04X,', $sort[$i]); | |
| 164 } | |
| 165 if ($line !== ' ') { | |
| 166 $out[] = $line; | |
| 167 } | |
| 168 $out[] = '};'; | |
| 169 $tot_8859 += $cnt * 2; | |
| 170 | |
| 171 $cnt = count($sb); | |
| 172 $out[] = 'static const unsigned char iso8859_' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */'; | |
| 173 $line = ' '; | |
| 174 for ($i = 0; $i < $cnt; $i++) { | |
| 175 if ($i && $i % 8 === 0) { | |
| 176 $out[] = $line; | |
| 177 $line = ' '; | |
| 178 } | |
| 179 $line .= sprintf(' 0x%02X,', $sb[$i]); | |
| 180 } | |
| 181 if ($line !== ' ') { | |
| 182 $out[] = $line; | |
| 183 } | |
| 184 $out[] = '};'; | |
| 185 $tot_8859 += $cnt; | |
| 186 | |
| 187 $out[] = ''; | |
| 188 $out[] = '/* ECI ' . $iso8859_comments[$k][0] . ' ISO/IEC 8859-' . $k . ' ' . $iso8859_comments[$k][1] . ' */'; | |
| 189 $out[] = 'static int u_iso8859_' . $k . '(const unsigned int u, unsigned char *dest) {'; | |
| 190 $out[] = ' return u_iso8859(u, iso8859_' . $k . '_s, iso8859_' . $k . '_u, iso8859_' . $k . '_sb, ARRAY_SIZE(iso8859_' . $k . '_u) - 1, dest);'; | |
| 191 $out[] = '}'; | |
| 192 } | |
| 193 | |
| 194 if (0) { | |
| 195 $out[] = ''; | |
| 196 $out[] = '/* Total ISO/IEC 8859 bytes: ' . $tot_8859 . ' */'; | |
| 197 } | |
| 198 | |
| 199 $u_cp125x = <<<'EOD' | |
| 200 | |
| 201 /* Forward reference to base Windows-125x routine - see "eci.c" */ | |
| 202 static int u_cp125x(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u, | |
| 203 const unsigned char *tab_sb, int e, unsigned char *dest); | |
| 204 EOD; | |
| 205 | |
| 206 $out = array_merge($out, explode("\n", $u_cp125x)); | |
| 207 | |
| 208 $cp125x_comments = array( | |
| 209 // ECI Description | |
| 210 array( '21', 'Latin 2 (Central Europe)' ), | |
| 211 array( '22', 'Cyrillic' ), | |
| 212 array( '23', 'Latin 1' ), | |
| 213 array(), array(), array(), | |
| 214 array( '24', 'Arabic' ), | |
| 215 ); | |
| 216 | |
| 217 // Read the Windows 125x files. | |
| 218 | |
| 219 $tot_cp125x = 0; | |
| 220 for ($k = 0; $k <= 6; $k++) { | |
| 221 if ($k >= 3 && $k <= 5) continue; | |
| 222 | |
| 223 $file = 'https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/' . 'CP125' . $k . '.TXT'; | |
| 224 | |
| 225 if (($get = file_get_contents($file)) === false) { | |
| 226 error_log($error = "$basename: ERROR: Could not read mapping file \"$file\""); | |
| 227 exit($error . PHP_EOL); | |
| 228 } | |
| 229 | |
| 230 $lines = explode("\n", $get); | |
| 231 | |
| 232 // Parse the file. | |
| 233 | |
| 234 $sort = array(); | |
| 235 $sb = array(); | |
| 236 $same = array(); | |
| 237 foreach ($lines as $line) { | |
| 238 $line = trim($line); | |
| 239 if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) { | |
| 240 continue; | |
| 241 } | |
| 242 $matches = array(); | |
| 243 if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) { | |
| 244 $mb = hexdec($matches[1]); | |
| 245 $unicode = hexdec($matches[2]); | |
| 246 if ($unicode >= 0x80) { | |
| 247 if ($unicode <= 0xFF && $unicode == $mb) { | |
| 248 $same[] = $mb; | |
| 249 } else { | |
| 250 $sort[] = $unicode; | |
| 251 $sb[] = $mb; | |
| 252 } | |
| 253 } | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 sort($same); | |
| 258 array_multisort($sort, $sb); | |
| 259 | |
| 260 $s = array( 0, 0, 0, 0, 0, 0 ); | |
| 261 for ($i = 0, $cnt = count($same); $i < $cnt; $i++) { | |
| 262 $v = $same[$i] - 0xA0; | |
| 263 $j = $v >> 4; | |
| 264 $s[$j] |= 1 << ($v & 0xF); | |
| 265 } | |
| 266 | |
| 267 // Output. | |
| 268 | |
| 269 $out[] = ''; | |
| 270 $out[] = '/* Tables for Windows 125' . $k . ' */'; | |
| 271 $out[] = 'static const unsigned short cp125' . $k . '_s[6] = { /* Straight-thru bit-flags */'; | |
| 272 $line = ' '; | |
| 273 for ($i = 0; $i < 6; $i++) { | |
| 274 $line .= sprintf(" 0x%04X,", $s[$i]); | |
| 275 } | |
| 276 $out[] = $line; | |
| 277 $out[] = '};'; | |
| 278 $tot_cp125x += 6 * 2; | |
| 279 | |
| 280 $cnt = count($sort); | |
| 281 $out[] = 'static const unsigned short cp125' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */'; | |
| 282 $line = ' '; | |
| 283 for ($i = 0; $i < $cnt; $i++) { | |
| 284 if ($i && $i % 8 === 0) { | |
| 285 $out[] = $line; | |
| 286 $line = ' '; | |
| 287 } | |
| 288 $line .= sprintf(' 0x%04X,', $sort[$i]); | |
| 289 } | |
| 290 if ($line !== ' ') { | |
| 291 $out[] = $line; | |
| 292 } | |
| 293 $out[] = '};'; | |
| 294 $tot_cp125x += $cnt * 2; | |
| 295 | |
| 296 $cnt = count($sb); | |
| 297 $out[] = 'static const unsigned char cp125' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */'; | |
| 298 $line = ' '; | |
| 299 for ($i = 0; $i < $cnt; $i++) { | |
| 300 if ($i && $i % 8 === 0) { | |
| 301 $out[] = $line; | |
| 302 $line = ' '; | |
| 303 } | |
| 304 $line .= sprintf(' 0x%02X,', $sb[$i]); | |
| 305 } | |
| 306 if ($line !== ' ') { | |
| 307 $out[] = $line; | |
| 308 } | |
| 309 $out[] = '};'; | |
| 310 $tot_cp125x += $cnt; | |
| 311 | |
| 312 $out[] = ''; | |
| 313 $out[] = '/* ECI ' . $cp125x_comments[$k][0] . ' Windows-125' . $k . ' ' . $cp125x_comments[$k][1] . ' */'; | |
| 314 $out[] = 'static int u_cp125' . $k . '(const unsigned int u, unsigned char *dest) {'; | |
| 315 $out[] = ' return u_cp125x(u, cp125' . $k . '_s, cp125' . $k . '_u, cp125' . $k . '_sb, ARRAY_SIZE(cp125' . $k . '_u) - 1, dest);'; | |
| 316 $out[] = '}'; | |
| 317 } | |
| 318 | |
| 319 if (0) { | |
| 320 $out[] = ''; | |
| 321 $out[] = '/* Total Windows 125x bytes: ' . $tot_cp125x . ' */'; | |
| 322 | |
| 323 $out[] = ''; | |
| 324 $out[] = '/* Total bytes: ' . ($tot_8859 + $tot_cp125x) . ' */'; | |
| 325 } | |
| 326 | |
| 327 $out[] = ''; | |
| 328 $out[] = '#endif /* Z_ECI_SB_H */'; | |
| 329 | |
| 330 file_put_contents($out_dirname . '/eci_sb.h', implode("\n", $out) . "\n"); | |
| 331 | |
| 332 /* vim: set ts=4 sw=4 et : */ |
