diff mupdf-source/thirdparty/zint/backend/tools/gen_eci_sb_h.php @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/zint/backend/tools/gen_eci_sb_h.php	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,332 @@
+<?php
+/* Generate ECI single-byte tables & routines from unicode.org mapping files */
+/*
+    libzint - the open source barcode library
+    Copyright (C) 2022-2023 Robin Stuart <rstuart114@gmail.com>
+*/
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * To create "backend/eci_sb.h" (from project root directory):
+ *
+ *   php backend/tools/gen_eci_sb_h.php
+ */
+
+$basename = basename(__FILE__);
+$dirname = dirname(__FILE__);
+
+$opts = getopt('d:o:');
+$out_dirname = isset($opts['o']) ? $opts['o'] : ($dirname . '/..'); // Where to put output.
+
+$out = array();
+
+$head = <<<'EOD'
+/*  eci_sb.h - Extended Channel Interpretations single-byte, generated by "backend/tools/gen_eci_sb_h.php"
+    from "https://unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT"
+    and "https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP125*.TXT" */
+/*
+    libzint - the open source barcode library
+    Copyright (C) 2021-2022 Robin Stuart <rstuart114@gmail.com>
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+    3. Neither the name of the project nor the names of its contributors
+       may be used to endorse or promote products derived from this software
+       without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+    SUCH DAMAGE.
+ */
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef Z_ECI_SB_H
+#define Z_ECI_SB_H
+EOD;
+
+$out = explode("\n", $head);
+
+$u_iso8859 = <<<'EOD'
+
+/* Forward reference to base ISO/IEC 8859 routine - see "eci.c" */
+static int u_iso8859(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
+            const unsigned char *tab_sb, int e, unsigned char *dest);
+EOD;
+
+$out = array_merge($out, explode("\n", $u_iso8859));
+
+$iso8859_comments = array(
+    array(), array(), // 0-1
+    //    ECI    Description
+    array( '4', 'Latin alphabet No. 2 (Latin-2)' ),
+    array( '5', 'Latin alphabet No. 3 (Latin-3) (South European)' ),
+    array( '6', 'Latin alphabet No. 4 (Latin-4) (North European)' ),
+    array( '7', 'Latin/Cyrillic' ),
+    array( '8', 'Latin/Arabic' ),
+    array( '9', 'Latin/Greek' ),
+    array( '10', 'Latin/Hebrew' ),
+    array( '11', 'Latin alphabet No. 5 (Latin-5) (Latin/Turkish)' ),
+    array( '12', 'Latin alphabet No. 6 (Latin-6) (Nordic)' ),
+    array( '13', 'Latin/Thai' ),
+    array(),
+    array( '15', 'Latin alphabet No. 7 (Latin-7) (Baltic Rim)' ),
+    array( '16', 'Latin alphabet No. 8 (Latin-8) (Celtic)' ),
+    array( '17', 'Latin alphabet No. 9 (Latin-9)' ),
+    array( '18', 'Latin alphabet No. 10 (Latin-10) (South-Eastern European)' ),
+);
+
+// Read the 8859 files.
+
+$tot_8859 = 0;
+for ($k = 2; $k <= 16; $k++) {
+    if ($k == 12) continue;
+
+    $file = 'https://unicode.org/Public/MAPPINGS/ISO8859/' . '8859-' . $k . '.TXT';
+
+    if (($get = file_get_contents($file)) === false) {
+        error_log($error = "$basename: ERROR: Could not read mapping file \"$file\"");
+        exit($error . PHP_EOL);
+    }
+
+    $lines = explode("\n", $get);
+
+    // Parse the file.
+
+    $sort = array();
+    $sb = array();
+    $same = array();
+    foreach ($lines as $line) {
+        $line = trim($line);
+        if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) {
+            continue;
+        }
+        $matches = array();
+        if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) {
+            $mb = hexdec($matches[1]);
+            $unicode = hexdec($matches[2]);
+            if ($unicode >= 0xA0) {
+                if ($unicode <= 0xFF && $unicode == $mb) {
+                    $same[] = $mb;
+                } else {
+                    $sort[] = $unicode;
+                    $sb[] = $mb;
+                }
+            }
+        }
+    }
+
+    sort($same);
+    array_multisort($sort, $sb);
+
+    $s = array( 0, 0, 0, 0, 0, 0 );
+    for ($i = 0, $cnt = count($same); $i < $cnt; $i++) {
+        $v = $same[$i] - 0xA0;
+        $j = $v >> 4;
+        $s[$j] |= 1 << ($v & 0xF);
+    }
+
+    // Output.
+
+    $out[] = '';
+    $out[] = '/* Tables for ISO/IEC 8859-' . $k . ' */';
+    $out[] = 'static const unsigned short iso8859_' . $k . '_s[6] = { /* Straight-thru bit-flags */';
+    $line = '   ';
+    for ($i = 0; $i < 6; $i++) {
+        $line .= sprintf(" 0x%04X,", $s[$i]);
+    }
+    $out[] = $line;
+    $out[] = '};';
+    $tot_8859 += 6 * 2;
+
+    $cnt = count($sort);
+    $out[] = 'static const unsigned short iso8859_' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */';
+    $line = '   ';
+    for ($i = 0; $i < $cnt; $i++) {
+        if ($i && $i % 8 === 0) {
+            $out[] = $line;
+            $line = '   ';
+        }
+        $line .= sprintf(' 0x%04X,', $sort[$i]);
+    }
+    if ($line !== '   ') {
+        $out[] = $line;
+    }
+    $out[] = '};';
+    $tot_8859 += $cnt * 2;
+
+    $cnt = count($sb);
+    $out[] = 'static const unsigned char iso8859_' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */';
+    $line = ' ';
+    for ($i = 0; $i < $cnt; $i++) {
+        if ($i && $i % 8 === 0) {
+            $out[] = $line;
+            $line = ' ';
+        }
+        $line .= sprintf('   0x%02X,', $sb[$i]);
+    }
+    if ($line !== '   ') {
+        $out[] = $line;
+    }
+    $out[] = '};';
+    $tot_8859 += $cnt;
+
+    $out[] = '';
+    $out[] = '/* ECI ' . $iso8859_comments[$k][0] . ' ISO/IEC 8859-' . $k . ' ' . $iso8859_comments[$k][1] . ' */';
+    $out[] = 'static int u_iso8859_' . $k . '(const unsigned int u, unsigned char *dest) {';
+    $out[] = '    return u_iso8859(u, iso8859_' . $k . '_s, iso8859_' . $k . '_u, iso8859_' . $k . '_sb, ARRAY_SIZE(iso8859_' . $k . '_u) - 1, dest);';
+    $out[] = '}';
+}
+
+if (0) {
+    $out[] = '';
+    $out[] = '/* Total ISO/IEC 8859 bytes: ' . $tot_8859 . ' */';
+}
+
+$u_cp125x = <<<'EOD'
+
+/* Forward reference to base Windows-125x routine - see "eci.c" */
+static int u_cp125x(const unsigned int u, const unsigned short *tab_s, const unsigned short *tab_u,
+            const unsigned char *tab_sb, int e, unsigned char *dest);
+EOD;
+
+$out = array_merge($out, explode("\n", $u_cp125x));
+
+$cp125x_comments = array(
+    //    ECI    Description
+    array( '21', 'Latin 2 (Central Europe)' ),
+    array( '22', 'Cyrillic' ),
+    array( '23', 'Latin 1' ),
+    array(), array(), array(),
+    array( '24', 'Arabic' ),
+);
+
+// Read the Windows 125x files.
+
+$tot_cp125x = 0;
+for ($k = 0; $k <= 6; $k++) {
+    if ($k >= 3 && $k <= 5) continue;
+
+    $file = 'https://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/' . 'CP125' . $k . '.TXT';
+
+    if (($get = file_get_contents($file)) === false) {
+        error_log($error = "$basename: ERROR: Could not read mapping file \"$file\"");
+        exit($error . PHP_EOL);
+    }
+
+    $lines = explode("\n", $get);
+
+    // Parse the file.
+
+    $sort = array();
+    $sb = array();
+    $same = array();
+    foreach ($lines as $line) {
+        $line = trim($line);
+        if ($line === '' || strncmp($line, '0x', 2) !== 0 || strpos($line, "*** NO MAPPING ***") !== false) {
+            continue;
+        }
+        $matches = array();
+        if (preg_match('/^0x([0-9A-F]{2})[ \t]+0x([0-9A-F]{4})[ \t].*$/', $line, $matches)) {
+            $mb = hexdec($matches[1]);
+            $unicode = hexdec($matches[2]);
+            if ($unicode >= 0x80) {
+                if ($unicode <= 0xFF && $unicode == $mb) {
+                    $same[] = $mb;
+                } else {
+                    $sort[] = $unicode;
+                    $sb[] = $mb;
+                }
+            }
+        }
+    }
+
+    sort($same);
+    array_multisort($sort, $sb);
+
+    $s = array( 0, 0, 0, 0, 0, 0 );
+    for ($i = 0, $cnt = count($same); $i < $cnt; $i++) {
+        $v = $same[$i] - 0xA0;
+        $j = $v >> 4;
+        $s[$j] |= 1 << ($v & 0xF);
+    }
+
+    // Output.
+
+    $out[] = '';
+    $out[] = '/* Tables for Windows 125' . $k . ' */';
+    $out[] = 'static const unsigned short cp125' . $k . '_s[6] = { /* Straight-thru bit-flags */';
+    $line = '   ';
+    for ($i = 0; $i < 6; $i++) {
+        $line .= sprintf(" 0x%04X,", $s[$i]);
+    }
+    $out[] = $line;
+    $out[] = '};';
+    $tot_cp125x += 6 * 2;
+
+    $cnt = count($sort);
+    $out[] = 'static const unsigned short cp125' . $k . '_u[' . $cnt . '] = { /* Unicode codepoints sorted */';
+    $line = '   ';
+    for ($i = 0; $i < $cnt; $i++) {
+        if ($i && $i % 8 === 0) {
+            $out[] = $line;
+            $line = '   ';
+        }
+        $line .= sprintf(' 0x%04X,', $sort[$i]);
+    }
+    if ($line !== '   ') {
+        $out[] = $line;
+    }
+    $out[] = '};';
+    $tot_cp125x += $cnt * 2;
+
+    $cnt = count($sb);
+    $out[] = 'static const unsigned char cp125' . $k . '_sb[' . $cnt . '] = { /* Single-byte in Unicode order */';
+    $line = ' ';
+    for ($i = 0; $i < $cnt; $i++) {
+        if ($i && $i % 8 === 0) {
+            $out[] = $line;
+            $line = ' ';
+        }
+        $line .= sprintf('   0x%02X,', $sb[$i]);
+    }
+    if ($line !== '   ') {
+        $out[] = $line;
+    }
+    $out[] = '};';
+    $tot_cp125x += $cnt;
+
+    $out[] = '';
+    $out[] = '/* ECI ' . $cp125x_comments[$k][0] . ' Windows-125' . $k . ' ' . $cp125x_comments[$k][1] . ' */';
+    $out[] = 'static int u_cp125' . $k . '(const unsigned int u, unsigned char *dest) {';
+    $out[] = '    return u_cp125x(u, cp125' . $k . '_s, cp125' . $k . '_u, cp125' . $k . '_sb, ARRAY_SIZE(cp125' . $k . '_u) - 1, dest);';
+    $out[] = '}';
+}
+
+if (0) {
+    $out[] = '';
+    $out[] = '/* Total Windows 125x bytes: ' . $tot_cp125x . ' */';
+
+    $out[] = '';
+    $out[] = '/* Total bytes: ' . ($tot_8859 + $tot_cp125x) . ' */';
+}
+
+$out[] = '';
+$out[] = '#endif /* Z_ECI_SB_H */';
+
+file_put_contents($out_dirname . '/eci_sb.h', implode("\n", $out) . "\n");
+
+/* vim: set ts=4 sw=4 et : */