Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/zint/backend/tests/test_gb2312.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 libzint - the open source barcode library | |
| 3 Copyright (C) 2019-2023 Robin Stuart <rstuart114@gmail.com> | |
| 4 | |
| 5 Redistribution and use in source and binary forms, with or without | |
| 6 modification, are permitted provided that the following conditions | |
| 7 are met: | |
| 8 | |
| 9 1. Redistributions of source code must retain the above copyright | |
| 10 notice, this list of conditions and the following disclaimer. | |
| 11 2. Redistributions in binary form must reproduce the above copyright | |
| 12 notice, this list of conditions and the following disclaimer in the | |
| 13 documentation and/or other materials provided with the distribution. | |
| 14 3. Neither the name of the project nor the names of its contributors | |
| 15 may be used to endorse or promote products derived from this software | |
| 16 without specific prior written permission. | |
| 17 | |
| 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| 19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
| 22 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 23 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
| 24 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 25 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
| 26 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
| 27 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 28 SUCH DAMAGE. | |
| 29 */ | |
| 30 /* SPDX-License-Identifier: BSD-3-Clause */ | |
| 31 | |
| 32 #include "testcommon.h" | |
| 33 #include "test_gb2312_tab.h" | |
| 34 #include "../eci.h" | |
| 35 /* For local "private" testing using previous libiconv adaptation, not included for licensing reasons */ | |
| 36 #if 0 | |
| 37 #define TEST_JUST_SAY_GNO | |
| 38 #endif | |
| 39 #ifdef TEST_JUST_SAY_GNO | |
| 40 #include "../just_say_gno/gb2312_gnu.c" | |
| 41 #endif | |
| 42 | |
| 43 INTERNAL int u_gb2312_int_test(const unsigned int u, unsigned int *d); | |
| 44 | |
| 45 /* As control convert to GB 2312 using simple table generated from unicode.org GB2312.TXT plus simple processing */ | |
| 46 /* GB2312.TXT no longer on unicode.org site but available from https://haible.de/bruno/charsets/conversion-tables/GB2312.html */ | |
| 47 static int u_gb2312_int2(unsigned int u, unsigned int *d) { | |
| 48 int tab_length, start_i, end_i; | |
| 49 int i; | |
| 50 | |
| 51 if (u < 0x80) { | |
| 52 *d = (unsigned char) u; | |
| 53 return 1; | |
| 54 } | |
| 55 /* Shortcut */ | |
| 56 if ((u > 0x0451 && u < 0x2015) || (u > 0x3229 && u < 0x4E00) || (u > 0x9FA0 && u < 0xFF01) || u > 0xFFE5) { | |
| 57 return 0; | |
| 58 } | |
| 59 tab_length = ARRAY_SIZE(test_gb2312_tab); | |
| 60 start_i = test_gb2312_tab_ind[u >> 10]; | |
| 61 end_i = start_i + 0x800 > tab_length ? tab_length : start_i + 0x800; | |
| 62 for (i = start_i; i < end_i; i += 2) { | |
| 63 if (test_gb2312_tab[i + 1] == u) { | |
| 64 *d = test_gb2312_tab[i] + 0x8080; /* Table in GB 2312 not EUC-CN */ | |
| 65 return 2; | |
| 66 } | |
| 67 } | |
| 68 return 0; | |
| 69 } | |
| 70 | |
| 71 #include <time.h> | |
| 72 | |
| 73 #define TEST_PERF_TIME(arg) (((arg) * 1000.0) / CLOCKS_PER_SEC) | |
| 74 #define TEST_PERF_RATIO(a1, a2) (a2 ? TEST_PERF_TIME(a1) / TEST_PERF_TIME(a2) : 0) | |
| 75 | |
| 76 #ifdef TEST_JUST_SAY_GNO | |
| 77 #define TEST_INT_PERF_ITERATIONS 250 | |
| 78 #endif | |
| 79 | |
| 80 static void test_u_gb2312_int(const testCtx *const p_ctx) { | |
| 81 int debug = p_ctx->debug; | |
| 82 | |
| 83 int ret, ret2; | |
| 84 unsigned int val, val2; | |
| 85 unsigned int i; | |
| 86 | |
| 87 #ifdef TEST_JUST_SAY_GNO | |
| 88 int j; | |
| 89 clock_t start; | |
| 90 clock_t total = 0, total_gno = 0; | |
| 91 #else | |
| 92 (void)debug; | |
| 93 #endif | |
| 94 | |
| 95 testStart("test_u_gb2312_int"); | |
| 96 | |
| 97 #ifdef TEST_JUST_SAY_GNO | |
| 98 if ((debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */ | |
| 99 printf("test_u_gb2312_int perf iterations: %d\n", TEST_INT_PERF_ITERATIONS); | |
| 100 } | |
| 101 #endif | |
| 102 | |
| 103 for (i = 0; i < 0xFFFE; i++) { | |
| 104 if (i >= 0xD800 && i <= 0xDFFF) { /* UTF-16 surrogates */ | |
| 105 continue; | |
| 106 } | |
| 107 if (testContinue(p_ctx, i)) continue; | |
| 108 val = val2 = 0; | |
| 109 ret = u_gb2312_int_test(i, &val); | |
| 110 ret2 = u_gb2312_int2(i, &val2); | |
| 111 assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val 0x%04X, val2 0x%04X\n", (int) i, i, ret, ret2, val, val2); | |
| 112 if (ret2) { | |
| 113 assert_equal(val, val2, "i:%d 0x%04X val 0x%04X != val2 0x%04X\n", (int) i, i, val, val2); | |
| 114 } | |
| 115 #ifdef TEST_JUST_SAY_GNO | |
| 116 /* `gb2312_wctomb_zint()` doesn't handle ASCII; and ignore duplicate mappings, no longer done */ | |
| 117 if (i >= 0x80 && i != 0xB7 && i != 0x2014) { | |
| 118 if (!(debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */ | |
| 119 val2 = 0; | |
| 120 ret2 = gb2312_wctomb_zint(&val2, i); | |
| 121 } else { | |
| 122 for (j = 0; j < TEST_INT_PERF_ITERATIONS; j++) { | |
| 123 val = val2 = 0; | |
| 124 | |
| 125 start = clock(); | |
| 126 ret = u_gb2312_int_test(i, &val); | |
| 127 total += clock() - start; | |
| 128 | |
| 129 start = clock(); | |
| 130 ret2 = gb2312_wctomb_zint(&val2, i); | |
| 131 total_gno += clock() - start; | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val 0x%04X, val2 0x%04X\n", (int) i, i, ret, ret2, val, val2); | |
| 136 if (ret2) { | |
| 137 assert_equal(val, val2, "i:%d 0x%04X val 0x%04X != val2 0x%04X\n", (int) i, i, val, val2); | |
| 138 } | |
| 139 } | |
| 140 #endif | |
| 141 } | |
| 142 | |
| 143 #ifdef TEST_JUST_SAY_GNO | |
| 144 if ((debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */ | |
| 145 printf("test_u_gb2312_int perf totals: new % 8gms, gno % 8gms ratio %g\n", | |
| 146 TEST_PERF_TIME(total), TEST_PERF_TIME(total_gno), TEST_PERF_RATIO(total, total_gno)); | |
| 147 } | |
| 148 #endif | |
| 149 | |
| 150 testFinish(); | |
| 151 } | |
| 152 | |
| 153 static void test_gb2312_utf8(const testCtx *const p_ctx) { | |
| 154 | |
| 155 struct item { | |
| 156 char *data; | |
| 157 int length; | |
| 158 int ret; | |
| 159 int ret_length; | |
| 160 unsigned int expected_gbdata[20]; | |
| 161 char *comment; | |
| 162 }; | |
| 163 /* | |
| 164 é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 2312 0xA8A6, UTF-8 C3A9 | |
| 165 β U+03B2 in ISO 8859-7 Greek (but not other ISO 8859 or Win page), in GB 2312 0xA6C2, UTF-8 CEB2 | |
| 166 ¤ U+00A4 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 2312 0xA1E8, UTF-8 C2A4 | |
| 167 ¥ U+00A5 in ISO 8859-1 0xA5, not in GB 2312, UTF-8 C2A5 | |
| 168 ・ U+30FB katakana middle dot, not in any ISO or Win page, in GB 2312 "GB2312.TXT" 0xA1A4, duplicate of mapping of U+00B7, UTF-8 E383BB | |
| 169 · U+00B7 middle dot in ISO 8859-1 0xB7, in GB 2312 "GB 18030 subset" 0xA1A4, duplicate of mapping of U+30FB, UTF-8 C2B7 | |
| 170 ― U+2015 horizontal bar in ISO 8859-7 Greek and ISO 8859-10 Nordic, not in any Win page, in GB 2312 "GB2312.TXT" 0xA1AA, duplicate of mapping of U+2014, UTF-8 E28095 | |
| 171 — U+2014 em dash, not in any ISO, in Win 1250 and other Win, in GB 2312 "GB 18030 subset" 0xA1AA, duplicate of mapping of U+2015, UTF-8 E28094 | |
| 172 */ | |
| 173 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */ | |
| 174 struct item data[] = { | |
| 175 /* 0*/ { "é", -1, 0, 1, { 0xA8A6 }, "" }, | |
| 176 /* 1*/ { "β", -1, 0, 1, { 0xA6C2 }, "" }, | |
| 177 /* 2*/ { "¤", -1, 0, 1, { 0xA1E8 }, "" }, | |
| 178 /* 3*/ { "¥", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "" }, | |
| 179 /* 4*/ { "・", -1, 0, 1, { 0xA1A4 }, "GB2312.TXT mapping" }, | |
| 180 /* 5*/ { "·", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "No longer does GB 18030 subset mapping" }, | |
| 181 /* 6*/ { "―", -1, 0, 1, { 0xA1AA }, "GB2312.TXT mapping" }, | |
| 182 /* 7*/ { "—", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "No longer does GB 18030 subset mapping" }, | |
| 183 /* 8*/ { "aβc・―é―Z", -1, 0, 8, { 'a', 0xA6C2, 'c', 0xA1A4, 0xA1AA, 0xA8A6, 0xA1AA, 'Z' }, "" }, | |
| 184 /* 9*/ { "aβc・·—é—Z", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "No longer does GB 18030 mappings" }, | |
| 185 /* 10*/ { "\200", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Invalid UTF-8" }, | |
| 186 }; | |
| 187 int data_size = ARRAY_SIZE(data); | |
| 188 int i, length, ret; | |
| 189 | |
| 190 struct zint_symbol symbol = {0}; | |
| 191 unsigned int gbdata[20]; | |
| 192 | |
| 193 testStart("test_gb2312_utf8"); | |
| 194 | |
| 195 for (i = 0; i < data_size; i++) { | |
| 196 int ret_length; | |
| 197 | |
| 198 if (testContinue(p_ctx, i)) continue; | |
| 199 | |
| 200 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length; | |
| 201 ret_length = length; | |
| 202 | |
| 203 ret = gb2312_utf8(&symbol, (unsigned char *) data[i].data, &ret_length, gbdata); | |
| 204 assert_equal(ret, data[i].ret, "i:%d ret %d != %d (%s)\n", i, ret, data[i].ret, symbol.errtxt); | |
| 205 if (ret == 0) { | |
| 206 int j; | |
| 207 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length); | |
| 208 for (j = 0; j < (int) ret_length; j++) { | |
| 209 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]); | |
| 210 } | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 testFinish(); | |
| 215 } | |
| 216 | |
| 217 static void test_gb2312_utf8_to_eci(const testCtx *const p_ctx) { | |
| 218 | |
| 219 struct item { | |
| 220 int eci; | |
| 221 int full_multibyte; | |
| 222 char *data; | |
| 223 int length; | |
| 224 int ret; | |
| 225 int ret_length; | |
| 226 unsigned int expected_gbdata[20]; | |
| 227 char *comment; | |
| 228 }; | |
| 229 /* | |
| 230 é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in GRIDMATRIX Chinese mode first byte range 0xA1..A9, 0xB0..F7 | |
| 231 β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page) | |
| 232 ¥ U+00A5 in ISO 8859-1 0xA5, in first byte range 0xA1..A9, 0xB0..F7 | |
| 233 NBSP U+00A0 in ISO 8859-1 0xA0, outside first byte and second byte range 0xA1..FE, UTF-8 C2A0 (\302\240) | |
| 234 ¡ U+00A1 in ISO 8859-1 0xA1, in first byte range | |
| 235 © U+00A9 in ISO 8859-1 0xA9, in first byte range | |
| 236 ª U+00AA in ISO 8859-1 0xAA, outside first byte range | |
| 237 ¯ U+00AF in ISO 8859-1 0xAF, outside first byte range | |
| 238 ° U+00B0 in ISO 8859-1 0xB0, in first byte range | |
| 239 ÷ U+00F7 in ISO 8859-1 0xF7, in first byte range | |
| 240 ø U+00F8 in ISO 8859-1 0xF8, outside first byte range | |
| 241 ÿ U+00FF in ISO 8859-1 0xFF, outside first byte and second byte range | |
| 242 */ | |
| 243 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */ | |
| 244 struct item data[] = { | |
| 245 /* 0*/ { 3, 0, "é", -1, 0, 1, { 0xE9 }, "Not full multibyte" }, | |
| 246 /* 1*/ { 3, 1, "é", -1, 0, 1, { 0xE9 }, "First byte in range but only one byte" }, | |
| 247 /* 2*/ { 3, 0, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Not in ECI 3 (ISO 8859-1)" }, | |
| 248 /* 3*/ { 3, 1, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Not in ECI 3 (ISO 8859-1)" }, | |
| 249 /* 4*/ { 9, 0, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" }, | |
| 250 /* 5*/ { 9, 1, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" }, | |
| 251 /* 6*/ { 3, 0, "¥", -1, 0, 1, { 0xA5 }, "Not full multibyte" }, | |
| 252 /* 7*/ { 3, 1, "¥", -1, 0, 1, { 0xA5 }, "First byte in range but only one byte" }, | |
| 253 /* 8*/ { 3, 0, "¡é", -1, 0, 2, { 0xA1, 0xE9 }, "Not full multibyte" }, | |
| 254 /* 9*/ { 3, 1, "¡é", -1, 0, 1, { 0xA1E9 }, "In GRIDMATRIX Chinese mode range" }, | |
| 255 /* 10*/ { 3, 0, "¡\302\240", -1, 0, 2, { 0xA1, 0xA0 }, "Not full multibyte" }, | |
| 256 /* 11*/ { 3, 1, "¡\302\240", -1, 0, 2, { 0xA1, 0xA0 }, "First byte in range but not second" }, | |
| 257 /* 12*/ { 3, 0, "©é", -1, 0, 2, { 0xA9, 0xE9 }, "Not full multibyte" }, | |
| 258 /* 13*/ { 3, 1, "©é", -1, 0, 1, { 0xA9E9 }, "In GRIDMATRIX Chinese mode range" }, | |
| 259 /* 14*/ { 3, 0, "©ÿ", -1, 0, 2, { 0xA9, 0xFF }, "Not full multibyte" }, | |
| 260 /* 15*/ { 3, 1, "©ÿ", -1, 0, 2, { 0xA9, 0xFF }, "First byte in range but not second" }, | |
| 261 /* 16*/ { 3, 0, "éaé驪ª©¯é°°é÷éø", -1, 0, 16, { 0xE9, 0x61, 0xE9, 0xE9, 0xA9, 0xAA, 0xAA, 0xA9, 0xAF, 0xE9, 0xB0, 0xB0, 0xE9, 0xF7, 0xE9, 0xF8 }, "" }, | |
| 262 /* 17*/ { 3, 1, "éaé驪ª©¯é°°é÷éø", -1, 0, 10, { 0xE9, 0x61, 0xE9E9, 0xA9AA, 0xAA, 0xA9AF, 0xE9B0, 0xB0E9, 0xF7E9, 0xF8 }, "" }, | |
| 263 /* 18*/ { 20, 0, "\\\\", -1, 0, 4, { 0x81, 0x5F, 0x81, 0x5F }, "Shift JIS reverse solidus (backslash) mapping from ASCII to double byte" }, | |
| 264 /* 19*/ { 20, 1, "\\\\", -1, 0, 4, { 0x81, 0x5F, 0x81, 0x5F }, "Shift JIS outside GB 2312 Hanzi mode range" }, | |
| 265 /* 20*/ { 20, 0, "爍", -1, 0, 2, { 0xE0, 0xA1 }, "Shift JIS U+720D" }, | |
| 266 /* 21*/ { 20, 1, "爍", -1, 0, 1, { 0xE0A1 }, "Shift JIS in GB 2312 Hanzi mode range" }, | |
| 267 /* 22*/ { 25, 0, "12", -1, 0, 4, { 0x00, 0x31, 0x00, 0x32 }, "UCS-2BE ASCII" }, | |
| 268 /* 23*/ { 25, 0, "", -1, 0, 4, { 0x00, 0x81, 0x00, 0x81 }, "UCS-2BE U+0081" }, | |
| 269 /* 24*/ { 25, 1, "", -1, 0, 4, { 0x00, 0x81, 0x00, 0x81 }, "UCS-2BE outside GB 2312 Hanzi mode range" }, | |
| 270 /* 25*/ { 25, 0, "ꆩꆩ", -1, 0, 4, { 0xA1, 0xA9, 0xA1, 0xA9 }, "UCS-2BE U+A1A9" }, | |
| 271 /* 26*/ { 25, 1, "ꆩꆩ", -1, 0, 2, { 0xA1A9, 0xA1A9 }, "UCS-2BE in GB 2312 Hanzi mode range" }, | |
| 272 /* 27*/ { 25, 0, "膀膀", -1, 0, 4, { 0x81, 0x80, 0x81, 0x80 }, "UCS-2BE U+8180" }, | |
| 273 /* 28*/ { 25, 1, "膀膀", -1, 0, 4, { 0x81, 0x80, 0x81, 0x80 }, "UCS-2BE outside GB 2312 Hanzi mode range (but in GB 18030 range)" }, | |
| 274 /* 29*/ { 28, 0, "¢¢", -1, 0, 4, { 0xA2, 0x46, 0xA2, 0x46 }, "Big5 U+00A2" }, | |
| 275 /* 30*/ { 28, 1, "¢¢", -1, 0, 4, { 0xA2, 0x46, 0xA2, 0x46 }, "Big5 outside GB 2312 Hanzi mode range (but in GB 18030 range)" }, | |
| 276 /* 31*/ { 28, 0, "陛", -1, 0, 2, { 0xB0, 0xA1 }, "Big5 U+965B" }, | |
| 277 /* 32*/ { 28, 1, "陛", -1, 0, 1, { 0xB0A1 }, "Big5 in GB 2312 Hanzi mode range" }, | |
| 278 /* 33*/ { 29, 0, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 2312 U+00A8" }, | |
| 279 /* 34*/ { 29, 1, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 2312" }, | |
| 280 /* 35*/ { 29, 0, "崂", -1, 0, 1, { 0xE1C0 }, "GB 2312 U+5D02" }, | |
| 281 /* 36*/ { 29, 1, "崂", -1, 0, 1, { 0xE1C0 }, "GB 2312" }, | |
| 282 /* 37*/ { 29, 0, "・", -1, 0, 1, { 0xA1A4 }, "GB 2312 U+30FB" }, | |
| 283 /* 38*/ { 29, 1, "・", -1, 0, 1, { 0xA1A4 }, "GB 2312" }, | |
| 284 /* 39*/ { 29, 0, "釦", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "GB 18030 U+91E6 not in GB 2312" }, | |
| 285 /* 40*/ { 30, 0, "¡¡", -1, 0, 4, { 0x22 + 0x80, 0x2E + 0x80, 0x22 + 0x80, 0x2E + 0x80 }, "EUC-KR U+00A1 (0xA2AE)" }, | |
| 286 /* 41*/ { 30, 1, "¡¡", -1, 0, 2, { 0x222E + 0x8080, 0x222E + 0x8080 }, "EUC-KR 0xA2AE in GB 2312 Hanzi mode range" }, | |
| 287 /* 42*/ { 30, 0, "詰", -1, 0, 2, { 0x7D + 0x80, 0x7E + 0x80 }, "EUC-KR U+8A70 (0xFDFE)" }, | |
| 288 /* 43*/ { 30, 1, "詰", -1, 0, 2, { 0x7D + 0x80, 0x7E + 0x80 }, "EUC-KR 0xFDFE > 0xF7FE so not in GB 2312 Hanzi mode range" }, | |
| 289 }; | |
| 290 int data_size = ARRAY_SIZE(data); | |
| 291 int i, length, ret; | |
| 292 | |
| 293 unsigned int gbdata[20]; | |
| 294 | |
| 295 testStart("test_gb2312_utf8_to_eci"); | |
| 296 | |
| 297 for (i = 0; i < data_size; i++) { | |
| 298 int ret_length; | |
| 299 | |
| 300 if (testContinue(p_ctx, i)) continue; | |
| 301 | |
| 302 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length; | |
| 303 ret_length = length; | |
| 304 | |
| 305 ret = gb2312_utf8_to_eci(data[i].eci, (unsigned char *) data[i].data, &ret_length, gbdata, data[i].full_multibyte); | |
| 306 assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret); | |
| 307 if (ret == 0) { | |
| 308 int j; | |
| 309 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length); | |
| 310 for (j = 0; j < (int) ret_length; j++) { | |
| 311 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] 0x%04X != 0x%04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]); | |
| 312 } | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 testFinish(); | |
| 317 } | |
| 318 | |
| 319 INTERNAL void gb2312_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata, | |
| 320 const int full_multibyte); | |
| 321 | |
| 322 static void test_gb2312_cpy(const testCtx *const p_ctx) { | |
| 323 | |
| 324 struct item { | |
| 325 int full_multibyte; | |
| 326 char *data; | |
| 327 int length; | |
| 328 int ret; | |
| 329 int ret_length; | |
| 330 unsigned int expected_gbdata[20]; | |
| 331 char *comment; | |
| 332 }; | |
| 333 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */ | |
| 334 struct item data[] = { | |
| 335 /* 0*/ { 0, "\351", -1, 0, 1, { 0xE9 }, "Not full multibyte" }, | |
| 336 /* 1*/ { 1, "\351", -1, 0, 1, { 0xE9 }, "In GRIDMATRIX Chinese mode first-byte range but only one byte" }, | |
| 337 /* 2*/ { 0, "\351\241", -1, 0, 2, { 0xE9, 0xA1 }, "Not full multibyte" }, | |
| 338 /* 3*/ { 1, "\351\241", -1, 0, 1, { 0xE9A1 }, "In GRIDMATRIX Chinese range" }, | |
| 339 /* 4*/ { 0, "\241", -1, 0, 1, { 0xA1 }, "Not full multibyte" }, | |
| 340 /* 5*/ { 1, "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" }, | |
| 341 /* 6*/ { 0, "\241\241", -1, 0, 2, { 0xA1, 0xA1 }, "Not full multibyte" }, | |
| 342 /* 7*/ { 1, "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" }, | |
| 343 /* 8*/ { 0, "\241\240\241\376\367\376\367\377", -1, 0, 8, { 0xA1, 0xA0, 0xA1, 0xFE, 0xF7, 0xFE, 0xF7, 0xFF }, "" }, | |
| 344 /* 9*/ { 1, "\241\240\241\376\367\376\367\377", -1, 0, 6, { 0xA1, 0xA0, 0xA1FE, 0xF7FE, 0xF7, 0xFF }, "" }, | |
| 345 }; | |
| 346 int data_size = ARRAY_SIZE(data); | |
| 347 int i, length; | |
| 348 | |
| 349 unsigned int gbdata[20]; | |
| 350 | |
| 351 testStart("test_gb2312_cpy"); | |
| 352 | |
| 353 for (i = 0; i < data_size; i++) { | |
| 354 int ret_length; | |
| 355 int j; | |
| 356 | |
| 357 if (testContinue(p_ctx, i)) continue; | |
| 358 | |
| 359 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length; | |
| 360 ret_length = length; | |
| 361 | |
| 362 gb2312_cpy_test((unsigned char *) data[i].data, &ret_length, gbdata, data[i].full_multibyte); | |
| 363 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length); | |
| 364 for (j = 0; j < (int) ret_length; j++) { | |
| 365 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]); | |
| 366 } | |
| 367 } | |
| 368 | |
| 369 testFinish(); | |
| 370 } | |
| 371 | |
| 372 #define TEST_PERF_ITER_MILLES 100 | |
| 373 #define TEST_PERF_ITERATIONS (TEST_PERF_ITER_MILLES * 1000) | |
| 374 | |
| 375 /* Not a real test, just performance indicator */ | |
| 376 static void test_perf(const testCtx *const p_ctx) { | |
| 377 int debug = p_ctx->debug; | |
| 378 | |
| 379 struct item { | |
| 380 char *data; | |
| 381 int ret; | |
| 382 | |
| 383 char *comment; | |
| 384 }; | |
| 385 struct item data[] = { | |
| 386 /* 0*/ { "1234567890", 0, "10 numerics" }, | |
| 387 /* 1*/ { "条码北京條碼པེ་ཅིང།バーコード바코드", 0, "Small various code pages" }, | |
| 388 /* 2*/ { "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", 0, "Small mixed ASCII/Hanzi" }, | |
| 389 /* 3*/ { "汉信码标准\015\012中国物品编码中心\015\012北京网路畅想科技发展有限公司\015\012张成海、赵楠、黄燕滨、罗秋科、王毅、张铎、王越\015\012施煜、边峥、修兴强\015\012汉信码标准\015\012中国物品编码中心\015\012北京网路畅想科技发展有限公司", 0, "Bigger mixed" }, | |
| 390 }; | |
| 391 int data_size = ARRAY_SIZE(data); | |
| 392 int i, length, ret; | |
| 393 | |
| 394 struct zint_symbol symbol = {0}; | |
| 395 int ret_length; | |
| 396 #ifdef TEST_JUST_SAY_GNO | |
| 397 int ret_length2; | |
| 398 #endif | |
| 399 unsigned int ddata[8192]; | |
| 400 unsigned char dest[8192]; | |
| 401 int ret2 = 0; | |
| 402 #ifdef TEST_JUST_SAY_GNO | |
| 403 unsigned int ddata2[8192]; | |
| 404 unsigned char dest2[8192]; | |
| 405 #endif | |
| 406 | |
| 407 clock_t start; | |
| 408 clock_t total = 0, total_gno = 0, total_eci = 0, total_eci_gno = 0; | |
| 409 clock_t diff, diff_gno, diff_eci, diff_eci_gno; | |
| 410 int comment_max = 0; | |
| 411 | |
| 412 if (!(debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */ | |
| 413 return; | |
| 414 } | |
| 415 | |
| 416 for (i = 0; i < data_size; i++) if ((int) strlen(data[i].comment) > comment_max) comment_max = (int) strlen(data[i].comment); | |
| 417 | |
| 418 printf("Iterations %d\n", TEST_PERF_ITERATIONS); | |
| 419 | |
| 420 for (i = 0; i < data_size; i++) { | |
| 421 int j; | |
| 422 | |
| 423 if (testContinue(p_ctx, i)) continue; | |
| 424 | |
| 425 length = (int) strlen(data[i].data); | |
| 426 | |
| 427 diff = diff_gno = diff_eci = diff_eci_gno = 0; | |
| 428 | |
| 429 for (j = 0; j < TEST_PERF_ITERATIONS; j++) { | |
| 430 ret_length = length; | |
| 431 | |
| 432 start = clock(); | |
| 433 ret = gb2312_utf8(&symbol, (unsigned char *) data[i].data, &ret_length, ddata); | |
| 434 diff += clock() - start; | |
| 435 | |
| 436 #ifdef TEST_JUST_SAY_GNO | |
| 437 ret_length2 = length; | |
| 438 start = clock(); | |
| 439 ret2 = gb2312_utf8_wctomb(&symbol, (unsigned char *) data[i].data, &ret_length2, ddata2); | |
| 440 diff_gno += clock() - start; | |
| 441 #endif | |
| 442 | |
| 443 ret_length = length; | |
| 444 | |
| 445 start = clock(); | |
| 446 (void)utf8_to_eci(29, (unsigned char *) data[i].data, dest, &ret_length); | |
| 447 diff_eci += clock() - start; | |
| 448 | |
| 449 #ifdef TEST_JUST_SAY_GNO | |
| 450 ret_length2 = length; | |
| 451 start = clock(); | |
| 452 (void)utf8_to_eci_wctomb(29, (unsigned char *) data[i].data, dest2, &ret_length2); | |
| 453 diff_eci_gno += clock() - start; | |
| 454 #endif | |
| 455 } | |
| 456 assert_equal(ret, ret2, "i:%d ret %d != ret2 %d\n", (int) i, ret, ret2); | |
| 457 | |
| 458 printf("%*s: new % 8gms, gno % 8gms ratio % 9g | eci % 8gms, gno % 8gms ratio %g\n", comment_max, data[i].comment, | |
| 459 TEST_PERF_TIME(diff), TEST_PERF_TIME(diff_gno), TEST_PERF_RATIO(diff, diff_gno), | |
| 460 TEST_PERF_TIME(diff_eci), TEST_PERF_TIME(diff_eci_gno), TEST_PERF_RATIO(diff_eci, diff_eci_gno)); | |
| 461 | |
| 462 total += diff; | |
| 463 total_gno += diff_gno; | |
| 464 total_eci += diff_eci; | |
| 465 total_eci_gno += diff_eci_gno; | |
| 466 } | |
| 467 if (p_ctx->index == -1) { | |
| 468 printf("%*s: new % 8gms, gno % 8gms ratio % 9g | eci % 8gms, gno % 8gms ratio %g\n", comment_max, "totals", | |
| 469 TEST_PERF_TIME(total), TEST_PERF_TIME(total_gno), TEST_PERF_RATIO(total, total_gno), | |
| 470 TEST_PERF_TIME(total_eci), TEST_PERF_TIME(total_eci_gno), TEST_PERF_RATIO(total_eci, total_eci_gno)); | |
| 471 } | |
| 472 } | |
| 473 | |
| 474 int main(int argc, char *argv[]) { | |
| 475 | |
| 476 testFunction funcs[] = { /* name, func */ | |
| 477 { "test_u_gb2312_int", test_u_gb2312_int }, | |
| 478 { "test_gb2312_utf8", test_gb2312_utf8 }, | |
| 479 { "test_gb2312_utf8_to_eci", test_gb2312_utf8_to_eci }, | |
| 480 { "test_gb2312_cpy", test_gb2312_cpy }, | |
| 481 { "test_perf", test_perf }, | |
| 482 }; | |
| 483 | |
| 484 testRun(argc, argv, funcs, ARRAY_SIZE(funcs)); | |
| 485 | |
| 486 testReport(); | |
| 487 | |
| 488 return 0; | |
| 489 } | |
| 490 | |
| 491 /* vim: set ts=4 sw=4 et : */ |
