comparison mupdf-source/thirdparty/zint/backend/tests/test_gb18030.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*
2 libzint - the open source barcode library
3 Copyright (C) 2019-2023 Robin Stuart <rstuart114@gmail.com>
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. Neither the name of the project nor the names of its contributors
15 may be used to endorse or promote products derived from this software
16 without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
22 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 SUCH DAMAGE.
29 */
30 /* SPDX-License-Identifier: BSD-3-Clause */
31
32 #include "testcommon.h"
33 #include "test_gb18030_tab.h"
34 #include "test_gbk_tab.h"
35 #include "../eci.h"
36 /* For local "private" testing using previous libiconv adaptation, not included for licensing reasons */
37 #if 0
38 #define TEST_JUST_SAY_GNO */
39 #endif
40 #ifdef TEST_JUST_SAY_GNO
41 #include "../just_say_gno/gb18030_gnu.c"
42 #include "../just_say_gno/gb2312_gnu.c"
43 #endif
44
45 INTERNAL int u_gb18030_int_test(const unsigned int u, unsigned int *dest1, unsigned int *dest2);
46
47 /* As control convert to GB 18030 using table generated from GB18030.TXT plus simple processing.
48 The version of GB18030.TXT is jdk-1.4.2/GB18030.TXT taken from
49 https://haible.de/bruno/charsets/conversion-tables/GB18030.html
50 The generated file backend/tests/test_gb18030_tab.h does not include U+10000..10FFFF codepoints to save space.
51 See also backend/tests/tools/data/GB18030.TXT.README and backend/tests/tools/gen_test_tab.php.
52 */
53 static int u_gb18030_int2(unsigned int u, unsigned int *dest1, unsigned int *dest2) {
54 unsigned int c;
55 int tab_length, start_i, end_i;
56 int i;
57
58 /* GB18030 two-byte extension */
59 if (u == 0x1E3F) { /* GB 18030-2005 change, was PUA U+E7C7 below, see Table 3-39, p.111, Lunde 2nd ed. */
60 *dest1 = 0xA8BC;
61 return 2;
62 }
63 /* GB18030 four-byte extension */
64 if (u == 0xE7C7) { /* PUA */
65 *dest1 = 0x8135;
66 *dest2 = 0xF437;
67 return 4;
68 }
69 /* GB18030 two-byte extension */
70 if (u >= 0x9FB4 && u <= 0x9FBB) { /* GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed. */
71 if (u == 0x9FB4) {
72 *dest1 = 0xFE59;
73 } else if (u == 0x9FB5) {
74 *dest1 = 0xFE61;
75 } else if (u == 0x9FB6 || u == 0x9FB7) {
76 *dest1 = 0xFE66 + (u - 0x9FB6);
77 } else if (u == 0x9FB8) {
78 *dest1 = 0xFE6D;
79 } else if (u == 0x9FB9) {
80 *dest1 = 0xFE7E;
81 } else if (u == 0x9FBA) {
82 *dest1 = 0xFE90;
83 } else {
84 *dest1 = 0xFEA0;
85 }
86 return 2;
87 }
88 /* GB18030 two-byte extension */
89 if (u >= 0xFE10 && u <= 0xFE19) { /* GB 18030-2005 change, were PUA, see Table 3-37, p.108, Lunde 2nd ed. */
90 if (u == 0xFE10) {
91 *dest1 = 0xA6D9;
92 } else if (u == 0xFE11) {
93 *dest1 = 0xA6DB;
94 } else if (u == 0xFE12) {
95 *dest1 = 0xA6DA;
96 } else if (u >= 0xFE13 && u <= 0xFE16) {
97 *dest1 = 0xA6DC + (u - 0xFE13);
98 } else if (u == 0xFE17 || u == 0xFE18) {
99 *dest1 = 0xA6EC + (u - 0xFE17);
100 } else {
101 *dest1 = 0xA6F3;
102 }
103 return 2;
104 }
105 /* GB18030 four-byte extension */
106 if (u >= 0xFE1A && u <= 0xFE2F) { /* These are Vertical Forms (U+FE1A..FE1F unassigned) and Combining Half Marks (U+FE20..FE2F) */
107 if (u >= 0xFE1A && u <= 0xFE1D) {
108 c = 0x84318336 + (u - 0xFE1A);
109 } else if (u >= 0xFE1E && u <= 0xFE27) {
110 c = 0x84318430 + (u - 0xFE1E);
111 } else {
112 c = 0x84318530 + (u - 0xFE28);
113 }
114 *dest1 = c >> 16;
115 *dest2 = c & 0xFFFF;
116 return 4;
117 }
118 /* GB18030 */
119 /* Code set 3 (Unicode U+10000..U+10FFFF) */
120 if (u >= 0x10000 /*&& u < 0x10400*/) { /* Not being called for U+10400..U+10FFFF */
121 c = u - 0x10000;
122 *dest1 = 0x9030;
123 *dest2 = 0x8130 + (c % 10) + 0x100 * (c / 10);
124 return 4;
125 }
126
127 tab_length = ARRAY_SIZE(test_gb18030_tab);
128 start_i = test_gb18030_tab_ind[u >> 10];
129 end_i = start_i + 0x800 > tab_length ? tab_length : start_i + 0x800;
130 for (i = start_i; i < end_i; i += 2) {
131 if (test_gb18030_tab[i + 1] == u) {
132 c = test_gb18030_tab[i];
133 if (c <= 0xFFFF) {
134 *dest1 = c;
135 return c <= 0xFF ? 1 : 2;
136 }
137 *dest1 = c >> 16;
138 *dest2 = c & 0xFFFF;
139 return 4;
140 }
141 }
142 return 0;
143 }
144
145 #include <time.h>
146
147 #define TEST_PERF_TIME(arg) (((arg) * 1000.0) / CLOCKS_PER_SEC)
148 #define TEST_PERF_RATIO(a1, a2) (a2 ? TEST_PERF_TIME(a1) / TEST_PERF_TIME(a2) : 0)
149
150 #ifdef TEST_JUST_SAY_GNO
151 #define TEST_INT_PERF_ITERATIONS 250
152 #endif
153
154 static void test_u_gb18030_int(const testCtx *const p_ctx) {
155 int debug = p_ctx->debug;
156
157 int ret, ret2;
158 unsigned int val1_1, val1_2, val2_1, val2_2;
159 unsigned int i;
160 /* See: https://file.allitebooks.com/20160708/CJKV%20Information%20Processing.pdf (table 3-37, p.109, 2nd ed.) */
161 static const int nonpua_nonbmp[] = {
162 0x20087, 0x20089, 0x200CC, 0x215D7, 0x2298F, 0x241FE
163 };
164 static const unsigned int nonpua_nonbmp_vals[] = {
165 0xFE51, 0xFE52, 0xFE53, 0xFE6C, 0xFE76, 0xFE91
166 };
167
168 #ifdef TEST_JUST_SAY_GNO
169 int j;
170 clock_t start;
171 clock_t total = 0, total_gno = 0;
172 #else
173 (void)debug;
174 #endif
175
176 testStart("test_u_gb18030_int");
177
178 #ifdef TEST_JUST_SAY_GNO
179 if ((debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */
180 printf("test_u_gb18030_int perf iterations: %d\n", TEST_INT_PERF_ITERATIONS);
181 }
182 #endif
183
184 for (i = 0; i < 0x10400; i++) { /* Don't bother with U+10400..U+10FFFF, programmatically filled */
185 if (i >= 0xD800 && i <= 0xDFFF) { /* UTF-16 surrogates */
186 continue;
187 }
188 if (testContinue(p_ctx, i)) continue;
189 val1_1 = val1_2 = val2_1 = val2_2 = 0;
190 ret = u_gb18030_int_test(i, &val1_1, &val1_2);
191 ret2 = u_gb18030_int2(i, &val2_1, &val2_2);
192 assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val1_1 0x%04X, val2_1 0x%04X, val1_2 0x%04X, val2_2 0x%04X\n", (int) i, i, ret, ret2, val1_1, val2_1, val1_2, val2_2);
193 if (ret2) {
194 assert_equal(val1_1, val2_1, "i:%d 0x%04X val1_1 0x%04X != val2_1 0x%04X\n", (int) i, i, val1_1, val2_1);
195 assert_equal(val1_2, val2_2, "i:%d 0x%04X val1_2 0x%04X != val2_2 0x%04X\n", (int) i, i, val1_2, val2_2);
196 }
197 #ifdef TEST_JUST_SAY_GNO
198 if (!(debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */
199 val2_1 = val2_2 = 0;
200 ret2 = gb18030_wctomb_zint(&val2_1, &val2_2, i);
201 } else {
202 for (j = 0; j < TEST_INT_PERF_ITERATIONS; j++) {
203 val1_1 = val1_2 = val2_1 = val2_2 = 0;
204
205 start = clock();
206 ret = u_gb18030_int_test(i, &val1_1, &val1_2);
207 total += clock() - start;
208
209 start = clock();
210 ret2 = gb18030_wctomb_zint(&val2_1, &val2_2, i);
211 total_gno += clock() - start;
212 }
213 }
214
215 assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val1_1 0x%04X, val2_1 0x%04X, val1_2 0x%04X, val2_2 0x%04X\n", (int) i, i, ret, ret2, val1_1, val2_1, val1_2, val2_2);
216 if (ret2) {
217 assert_equal(val1_1, val2_1, "i:%d 0x%04X val1_1 0x%04X != val2_1 0x%04X\n", (int) i, i, val1_1, val2_1);
218 assert_equal(val1_2, val2_2, "i:%d 0x%04X val1_2 0x%04X != val2_2 0x%04X\n", (int) i, i, val1_2, val2_2);
219 }
220 #endif
221 }
222
223 /* u_gb18030() assumes valid Unicode so now returns a nonsense value here */
224 val1_1 = val1_2 = 0;
225 ret = u_gb18030_int_test(0x110000, &val1_1, &val1_2); /* Invalid Unicode codepoint */
226 assert_equal(ret, 4, "0x110000 ret %d != 4, val1_1 0x%04X, val1_2 0x%04X\n", ret, val1_1, val1_2);
227
228 for (i = 0; i < ARRAY_SIZE(nonpua_nonbmp); i++) {
229 val1_1 = val1_2 = 0;
230 ret = u_gb18030_int_test(nonpua_nonbmp[i], &val1_1, &val1_2);
231 assert_equal(ret, 2, "i:%d 0x%04X ret %d != 2, val1_1 0x%04X, val1_2 0x%04X\n", (int) i, nonpua_nonbmp[i], ret, val1_1, val1_2);
232 assert_equal(val1_1, nonpua_nonbmp_vals[i], "i:%d 0x%04X val1_1 0x%04X != 0x%04X\n", (int) i, nonpua_nonbmp[i], val1_1, nonpua_nonbmp_vals[i]);
233 assert_zero(val1_2, "i:%d 0x%04X val1_2 0x%04X != 0\n", (int) i, nonpua_nonbmp[i], val1_2);
234 }
235
236 #ifdef TEST_JUST_SAY_GNO
237 if ((debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */
238 printf("test_u_gb18030_int perf totals: new % 8gms, gno % 8gms ratio %g\n",
239 TEST_PERF_TIME(total), TEST_PERF_TIME(total_gno), TEST_PERF_RATIO(total, total_gno));
240 }
241 #endif
242
243 testFinish();
244 }
245
246 static void test_gb18030_utf8(const testCtx *const p_ctx) {
247
248 struct item {
249 char *data;
250 int length;
251 int ret;
252 int ret_length;
253 unsigned int expected_gbdata[30];
254 char *comment;
255 };
256 /*
257 é U+00E9 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA8A6, UTF-8 C3A9
258 β U+03B2 in ISO 8859-7 Greek (but not other ISO 8859 or Win page), in GB 18030 0xA6C2, UTF-8 CEB2
259 ¤ U+00A4 in ISO 8859-1 plus other ISO 8859 (but not in ISO 8859-7 or ISO 8859-11), Win 1250 plus other Win, in GB 18030 0xA1E8, UTF-8 C2A4
260 ¥ U+00A5 in ISO 8859-1 0xA5, in GB 18030 4-byte 0x81308436, UTF-8 C2A5
261 ・ U+30FB katakana middle dot, not in any ISO or Win page, in GB 18030 0xA1A4, duplicate of mapping of U+00B7, UTF-8 E383BB
262 · U+00B7 middle dot in ISO 8859-1 0xB7, in GB 18030 "GB 18030 subset" 0xA1A4, duplicate of mapping of U+30FB, UTF-8 C2B7
263 ― U+2015 horizontal bar in ISO 8859-7 Greek and ISO 8859-10 Nordic, not in any Win page, in GB 18030 "GB18030.TXT" 0xA1AA, duplicate of mapping of U+2014, UTF-8 E28095
264 — U+2014 em dash, not in any ISO, in Win 1250 and other Win, in GB 18030 "GB 18030 subset" 0xA1AA, duplicate of mapping of U+2015, UTF-8 E28094
265 */
266 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */
267 struct item data[] = {
268 /* 0*/ { "é", -1, 0, 1, { 0xA8A6 }, "" },
269 /* 1*/ { "β", -1, 0, 1, { 0xA6C2 }, "" },
270 /* 2*/ { "¤", -1, 0, 1, { 0xA1E8 }, "" },
271 /* 3*/ { "¥", -1, 0, 2, { 0x8130, 0x8436 }, "0x81308436" },
272 /* 4*/ { "・", -1, 0, 2, { 0x8139, 0xA739 }, "" },
273 /* 5*/ { "·", -1, 0, 1, { 0xA1A4 }, "GB 18030 subset mapping" },
274 /* 6*/ { "―", -1, 0, 1, { 0xA844 }, "GB18030.TXT mapping" },
275 /* 7*/ { "—", -1, 0, 1, { 0xA1AA }, "GB 18030 subset mapping" },
276 /* 8*/ { "aβc・·—é—Z", -1, 0, 10, { 'a', 0xA6C2, 'c', 0x8139, 0xA739, 0xA1A4, 0xA1AA, 0xA8A6, 0xA1AA, 'Z' }, "" },
277 /* 9*/ { "\200", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Invalid UTF-8" },
278 /* 10*/ { "\357\277\276", -1, 0, 2, { 0x8431, 0xA438 }, "U+FFFE (reversed BOM)" },
279 /* 11*/ { "\357\277\277", -1, 0, 2, { 0x8431, 0xA439 }, "U+FFFF" },
280 };
281 int data_size = ARRAY_SIZE(data);
282 int i, length, ret;
283
284 struct zint_symbol symbol = {0};
285 unsigned int gbdata[30];
286
287 testStart("test_gb18030_utf8");
288
289 for (i = 0; i < data_size; i++) {
290 int ret_length;
291
292 if (testContinue(p_ctx, i)) continue;
293
294 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
295 ret_length = length;
296
297 ret = gb18030_utf8(&symbol, (unsigned char *) data[i].data, &ret_length, gbdata);
298 assert_equal(ret, data[i].ret, "i:%d ret %d != %d (%s)\n", i, ret, data[i].ret, symbol.errtxt);
299 if (ret == 0) {
300 int j;
301 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length);
302 for (j = 0; j < (int) ret_length; j++) {
303 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] 0x%04X != 0x%04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
304 }
305 }
306 }
307
308 testFinish();
309 }
310
311 static void test_gb18030_utf8_to_eci(const testCtx *const p_ctx) {
312
313 struct item {
314 int eci;
315 int full_multibyte;
316 char *data;
317 int length;
318 int ret;
319 int ret_length;
320 unsigned int expected_gbdata[30];
321 char *comment;
322 };
323 /*
324 é U+00E9 in ISO 8859-1 0xE9, Win 1250 plus other Win, in HANXIN Chinese mode first byte range 0x81..FE
325 β U+03B2 in ISO 8859-7 Greek 0xE2 (but not other ISO 8859 or Win page)
326 ¥ U+00A5 in ISO 8859-1 0xA5, in first byte range 0x81..FE
327 ÿ U+00FF in ISO 8859-1 0xFF, outside first byte and second/third/fourth byte ranges
328 @ U+0040 in ASCII 0x40, outside first byte range, in double-byte second byte range, outside quad-byte second/third/fourth byte ranges
329 9 U+0039 in ASCII 0x39, outside first byte range, outside double-byte second byte range and quad-byte third byte range, in quad-byte second/fourth byte ranges
330 */
331 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */
332 struct item data[] = {
333 /* 0*/ { 3, 0, "é", -1, 0, 1, { 0xE9 }, "Not full multibyte" },
334 /* 1*/ { 3, 1, "é", -1, 0, 1, { 0xE9 }, "First byte in range but only one byte" },
335 /* 2*/ { 3, 0, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Not full multibyte" },
336 /* 3*/ { 3, 1, "β", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "Not in ECI 3 (ISO 8859-1)" },
337 /* 4*/ { 9, 0, "β", -1, 0, 1, { 0xE2 }, "Not full multibyte" },
338 /* 5*/ { 9, 1, "β", -1, 0, 1, { 0xE2 }, "In ECI 9 (ISO 8859-7)" },
339 /* 6*/ { 3, 0, "¥", -1, 0, 1, { 0xA5 }, "Not full multibyte" },
340 /* 7*/ { 3, 1, "¥", -1, 0, 1, { 0xA5 }, "First byte in range but only one byte" },
341 /* 8*/ { 3, 0, "¥é", -1, 0, 2, { 0xA5, 0xE9 }, "Not full multibyte" },
342 /* 9*/ { 3, 1, "¥é", -1, 0, 1, { 0xA5E9 }, "In double-byte range" },
343 /* 10*/ { 3, 0, "¥ÿ", -1, 0, 2, { 0xA5, 0xFF }, "Not full multibyte" },
344 /* 11*/ { 3, 1, "¥ÿ", -1, 0, 2, { 0xA5, 0xFF }, "First byte in range but not second" },
345 /* 12*/ { 3, 0, "¥9é9", -1, 0, 4, { 0xA5, 0x39, 0xE9, 0x39 }, "Not full multibyte" },
346 /* 13*/ { 3, 1, "¥9é9", -1, 0, 2, { 0xA539, 0xE939 }, "In quad-byte range" },
347 /* 14*/ { 3, 0, "¥9", -1, 0, 2, { 0xA5, 0x39 }, "Not full multibyte" },
348 /* 15*/ { 3, 1, "¥9", -1, 0, 2, { 0xA5, 0x39 }, "In quad-byte first/second range but only 2 bytes, not in double-byte range" },
349 /* 16*/ { 3, 0, "¥9é", -1, 0, 3, { 0xA5, 0x39, 0xE9 }, "Not full multibyte" },
350 /* 17*/ { 3, 1, "¥9é", -1, 0, 3, { 0xA5, 0x39, 0xE9 }, "In quad-byte first/second/third range but only 3 bytes, no bytes in double-byte range" },
351 /* 18*/ { 3, 0, "¥9é@", -1, 0, 4, { 0xA5, 0x39, 0xE9, 0x40 }, "Not full multibyte" },
352 /* 19*/ { 3, 1, "¥9é@", -1, 0, 3, { 0xA5, 0x39, 0xE940 }, "In quad-byte first/second/third range but not fourth, second 2 bytes in double-byte range" },
353 /* 20*/ { 3, 0, "¥@é9", -1, 0, 4, { 0xA5, 0x40, 0xE9, 0x39 }, "Not full multibyte" },
354 /* 21*/ { 3, 1, "¥@é9", -1, 0, 3, { 0xA540, 0xE9, 0x39 }, "In quad-byte first/third/fourth range but not second, first 2 bytes in double-byte range" },
355 /* 22*/ { 3, 0, "¥9@9", -1, 0, 4, { 0xA5, 0x39, 0x40, 0x39 }, "Not full multibyte" },
356 /* 23*/ { 3, 1, "¥9@9", -1, 0, 4, { 0xA5, 0x39, 0x40, 0x39 }, "In quad-byte first/second/fourth range but not third, no bytes in double-byte range" },
357 /* 24*/ { 3, 0, "é9éé¥9é@¥9é9¥9é0é@@¥¥é0é1", -1, 0, 25, { 0xE9, 0x39, 0xE9, 0xE9, 0xA5, 0x39, 0xE9, 0x40, 0xA5, 0x39, 0xE9, 0x39, 0xA5, 0x39, 0xE9, 0x30, 0xE9, 0x40, 0x40, 0xA5, 0xA5, 0xE9, 0x30, 0xE9, 0x31 }, "" },
358 /* 25*/ { 3, 1, "é9éé¥9é@¥9é9¥9é0é@@¥¥é0é1", -1, 0, 15, { 0xE9, 0x39, 0xE9E9, 0xA5, 0x39, 0xE940, 0xA539, 0xE939, 0xA539, 0xE930, 0xE940, 0x40, 0xA5A5, 0xE930, 0xE931 }, "" },
359 /* 26*/ { 20, 0, "\\\\", -1, 0, 4, { 0x81, 0x5F, 0x81, 0x5F }, "Shift JIS reverse solidus (backslash) mapping from ASCII to double byte" },
360 /* 27*/ { 20, 1, "\\\\", -1, 0, 2, { 0x815F, 0x815F }, "Shift JIS in GB 18030 Hanzi mode range" },
361 /* 28*/ { 20, 0, "爍", -1, 0, 2, { 0xE0, 0xA1 }, "Shift JIS U+720D" },
362 /* 29*/ { 20, 1, "爍", -1, 0, 1, { 0xE0A1 }, "Shift JIS in GB 18030 Hanzi mode range" },
363 /* 30*/ { 25, 0, "12", -1, 0, 4, { 0x00, 0x31, 0x00, 0x32 }, "UCS-2BE ASCII" },
364 /* 31*/ { 25, 0, "", -1, 0, 4, { 0x00, 0x81, 0x00, 0x81 }, "UCS-2BE U+0081" },
365 /* 32*/ { 25, 1, "", -1, 0, 4, { 0x00, 0x81, 0x00, 0x81 }, "UCS-2BE outside GB 18030 Hanzi mode range" },
366 /* 33*/ { 25, 0, "ꆩꆩ", -1, 0, 4, { 0xA1, 0xA9, 0xA1, 0xA9 }, "UCS-2BE U+A1A9" },
367 /* 34*/ { 25, 1, "ꆩꆩ", -1, 0, 2, { 0xA1A9, 0xA1A9 }, "UCS-2BE in GB 18030 Hanzi mode range" },
368 /* 35*/ { 25, 0, "膀膀", -1, 0, 4, { 0x81, 0x80, 0x81, 0x80 }, "UCS-2BE U+8180" },
369 /* 36*/ { 25, 1, "膀膀", -1, 0, 2, { 0x8180, 0x8180 }, "UCS-2BE in GB 18030 Hanzi mode range (but outside GB 2312 range)" },
370 /* 37*/ { 28, 0, "¢¢", -1, 0, 4, { 0xA2, 0x46, 0xA2, 0x46 }, "Big5 U+00A2" },
371 /* 38*/ { 28, 1, "¢¢", -1, 0, 2, { 0xA246, 0xA246 }, "Big5 in GB 18030 Hanzi mode range (but outside GB 2312 range)" },
372 /* 39*/ { 28, 0, "陛", -1, 0, 2, { 0xB0, 0xA1 }, "Big5 U+965B" },
373 /* 40*/ { 28, 1, "陛", -1, 0, 1, { 0xB0A1 }, "Big5 in GB 18030 Hanzi mode range" },
374 /* 41*/ { 29, 0, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 2312 U+00A8" },
375 /* 42*/ { 29, 1, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 2312" },
376 /* 43*/ { 29, 0, "崂", -1, 0, 1, { 0xE1C0 }, "GB 2312 U+5D02" },
377 /* 44*/ { 29, 1, "崂", -1, 0, 1, { 0xE1C0 }, "GB 2312" },
378 /* 45*/ { 29, 0, "・", -1, 0, 1, { 0xA1A4 }, "GB 2312 U+30FB" },
379 /* 46*/ { 29, 1, "・", -1, 0, 1, { 0xA1A4 }, "GB 2312" },
380 /* 47*/ { 29, 0, "釦", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "GB 18030 U+91E6 not in GB 2312" },
381 /* 48*/ { 30, 0, "¡¡", -1, 0, 4, { 0x22 + 0x80, 0x2E + 0x80, 0x22 + 0x80, 0x2E + 0x80 }, "EUC-KR U+00A1 (0xA2AE)" },
382 /* 49*/ { 30, 1, "¡¡", -1, 0, 2, { 0x222E + 0x8080, 0x222E + 0x8080 }, "All EUC-KR in GB 18030 Hanzi mode range" },
383 /* 50*/ { 30, 0, "詰", -1, 0, 2, { 0x7D + 0x80, 0x7E + 0x80 }, "EUC-KR U+8A70 (0xFDFE)" },
384 /* 51*/ { 30, 1, "詰", -1, 0, 1, { 0x7D7E + 0x8080 }, "All EUC-KR in GB 18030 Hanzi mode range" },
385 /* 52*/ { 31, 0, "條", -1, 0, 1, { 0x976C }, "GBK U+689D" },
386 /* 53*/ { 31, 1, "條", -1, 0, 1, { 0x976C }, "GBK U+689D" },
387 /* 54*/ { 31, 0, "條碼", -1, 0, 2, { 0x976C, 0xB461 }, "GBK U+689D" },
388 /* 55*/ { 31, 1, "條碼", -1, 0, 2, { 0x976C, 0xB461 }, "GBK U+689D" },
389 /* 56*/ { 31, 0, "釦", -1, 0, 1, { 0xE240 }, "GB 18030 U+91E6 in GBK" },
390 /* 57*/ { 31, 0, "€", -1, ZINT_ERROR_INVALID_DATA, -1, {0}, "GB 18030 U+20AC not in GBK" },
391 /* 58*/ { 32, 0, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 18030 U+00A8" },
392 /* 59*/ { 32, 1, "¨¨", -1, 0, 2, { 0xA1A7, 0xA1A7 }, "GB 18030" },
393 /* 60*/ { 32, 0, "崂", -1, 0, 1, { 0xE1C0 }, "GB 18030 U+5D02" },
394 /* 61*/ { 32, 1, "崂", -1, 0, 1, { 0xE1C0 }, "GB 18030" },
395 /* 62*/ { 32, 0, "・", -1, 0, 2, { 0x8139, 0xA739 }, "GB 18030 U+30FB" },
396 /* 63*/ { 32, 1, "・", -1, 0, 2, { 0x8139, 0xA739 }, "GB 18030" },
397 /* 64*/ { 32, 0, "€", -1, 0, 1, { 0xA2E3 }, "GB 18030 U+20AC " },
398 };
399 int data_size = ARRAY_SIZE(data);
400 int i, length, ret;
401
402 unsigned int gbdata[30];
403
404 testStart("test_gb18030_utf8_to_eci");
405
406 for (i = 0; i < data_size; i++) {
407 int ret_length;
408
409 if (testContinue(p_ctx, i)) continue;
410
411 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
412 ret_length = length;
413
414 ret = gb18030_utf8_to_eci(data[i].eci, (unsigned char *) data[i].data, &ret_length, gbdata, data[i].full_multibyte);
415 assert_equal(ret, data[i].ret, "i:%d ret %d != %d\n", i, ret, data[i].ret);
416 if (ret == 0) {
417 int j;
418 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length);
419 for (j = 0; j < (int) ret_length; j++) {
420 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] 0x%04X != 0x%04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
421 }
422 }
423 }
424
425 testFinish();
426 }
427
428 INTERNAL void gb18030_cpy_test(const unsigned char source[], int *p_length, unsigned int *ddata,
429 const int full_multibyte);
430
431 static void test_gb18030_cpy(const testCtx *const p_ctx) {
432
433 struct item {
434 int full_multibyte;
435 char *data;
436 int length;
437 int ret;
438 int ret_length;
439 unsigned int expected_gbdata[30];
440 char *comment;
441 };
442 /* s/\/\*[ 0-9]*\*\//\=printf("\/\*%3d*\/", line(".") - line("'<")): */
443 struct item data[] = {
444 /* 0*/ { 0, "\351", -1, 0, 1, { 0xE9 }, "Not full multibyte" },
445 /* 1*/ { 1, "\351", -1, 0, 1, { 0xE9 }, "In HANXIN Chinese mode first-byte range but only one byte" },
446 /* 2*/ { 0, "\351\241", -1, 0, 2, { 0xE9, 0xA1 }, "Not full multibyte" },
447 /* 3*/ { 1, "\351\241", -1, 0, 1, { 0xE9A1 }, "In HANXIN Chinese range" },
448 /* 4*/ { 0, "\241", -1, 0, 1, { 0xA1 }, "Not full multibyte" },
449 /* 5*/ { 1, "\241", -1, 0, 1, { 0xA1 }, "In first-byte range but only one byte" },
450 /* 6*/ { 0, "\241\241", -1, 0, 2, { 0xA1, 0xA1 }, "Not full multibyte" },
451 /* 7*/ { 1, "\241\241", -1, 0, 1, { 0xA1A1 }, "In range" },
452 /* 8*/ { 0, "\241\240\241\376\367\376\367\377\2012\2013", -1, 0, 12, { 0xA1, 0xA0, 0xA1, 0xFE, 0xF7, 0xFE, 0xF7, 0xFF, 0x81, 0x32, 0x81, 0x33 }, "" },
453 /* 9*/ { 1, "\241\240\241\376\367\376\367\377\2012\2013", -1, 0, 7, { 0xA1A0, 0xA1FE, 0xF7FE, 0xF7, 0xFF, 0x8132, 0x8133 }, "" },
454 };
455 int data_size = ARRAY_SIZE(data);
456 int i, length;
457
458 unsigned int gbdata[30];
459
460 testStart("test_gb18030_cpy");
461
462 for (i = 0; i < data_size; i++) {
463 int ret_length;
464 int j;
465
466 if (testContinue(p_ctx, i)) continue;
467
468 length = data[i].length == -1 ? (int) strlen(data[i].data) : data[i].length;
469 ret_length = length;
470
471 gb18030_cpy_test((unsigned char *) data[i].data, &ret_length, gbdata, data[i].full_multibyte);
472 assert_equal(ret_length, data[i].ret_length, "i:%d ret_length %d != %d\n", i, ret_length, data[i].ret_length);
473 for (j = 0; j < (int) ret_length; j++) {
474 assert_equal(gbdata[j], data[i].expected_gbdata[j], "i:%d gbdata[%d] %04X != %04X\n", i, j, gbdata[j], data[i].expected_gbdata[j]);
475 }
476 }
477
478 testFinish();
479 }
480
481 INTERNAL int u_gbk_int_test(const unsigned int u, unsigned int *dest);
482
483 /* Control for GBK */
484 static int u_gbk_int2(unsigned int u, unsigned int *dest) {
485 unsigned int c;
486 int tab_length, start_i, end_i;
487 int i;
488
489 tab_length = ARRAY_SIZE(test_gbk_tab);
490 start_i = test_gbk_tab_ind[u >> 10];
491 end_i = start_i + 0x800 > tab_length ? tab_length : start_i + 0x800;
492 for (i = start_i; i < end_i; i += 2) {
493 if (test_gbk_tab[i + 1] == u) {
494 c = test_gbk_tab[i];
495 if (c <= 0xFFFF) {
496 *dest = c;
497 return c <= 0xFF ? 1 : 2;
498 }
499 return 0;
500 }
501 }
502 return 0;
503 }
504
505 static void test_u_gbk_int(const testCtx *const p_ctx) {
506
507 int ret, ret2;
508 unsigned int val, val2;
509 unsigned int i;
510
511 testStart("test_u_gbk_int");
512
513 for (i = 0; i < 0xFFFE; i++) {
514 if (i >= 0xD800 && i <= 0xDFFF) { /* UTF-16 surrogates */
515 continue;
516 }
517 if (testContinue(p_ctx, i)) continue;
518
519 val = val2 = 0;
520 ret = u_gbk_int_test(i, &val);
521 ret2 = u_gbk_int2(i, &val2);
522 assert_equal(ret, ret2, "i:%d 0x%04X ret %d != ret2 %d, val 0x%04X, val2 0x%04X\n", (int) i, i, ret, ret2, val, val2);
523 if (ret2) {
524 assert_equal(val, val2, "i:%d 0x%04X val 0x%04X != val2 0x%04X\n", (int) i, i, val, val2);
525 }
526 }
527
528 testFinish();
529 }
530
531 #define TEST_PERF_ITER_MILLES 100
532 #define TEST_PERF_ITERATIONS (TEST_PERF_ITER_MILLES * 1000)
533
534 /* Not a real test, just performance indicator */
535 static void test_perf(const testCtx *const p_ctx) {
536 int debug = p_ctx->debug;
537
538 struct item {
539 char *data;
540 int ret;
541
542 char *comment;
543 };
544 struct item data[] = {
545 /* 0*/ { "1234567890", 0, "10 numerics" },
546 /* 1*/ { "条码北京條碼པེ་ཅིང།バーコード바코드", 0, "Small various code pages" },
547 /* 2*/ { "Summer Palace Ticket for 6 June 2015 13:00;2015年6月6日夜01時00分PM頤和園のチケット;2015년6월6일13시오후여름궁전티켓.2015年6月6号下午13:00的颐和园门票;", 0, "Small mixed ASCII/Hanzi" },
548 /* 3*/ { "汉信码标准\015\012中国物品编码中心\015\012北京网路畅想科技发展有限公司\015\012张成海、赵楠、黄燕滨、罗秋科、王毅、张铎、王越\015\012施煜、边峥、修兴强\015\012汉信码标准\015\012中国物品编码中心\015\012北京网路畅想科技发展有限公司", 0, "Bigger mixed" },
549 /* 4*/ { "本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:40\015\012", 0, "Bigger mixed" },
550 /* 5*/ { "本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:40\015\012本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:40\015\012本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法RS、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 122", 0, "Medium mixed" },
551 /* 6*/ { "本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:84\015\012本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:84\015\012本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方案、信息编码方法、纠错编译码算法、信息排布方法、参考译码算法等内容进行了详细的描述,汉信码可高效表示《GB 18030—2000 信息技术 信息交换用汉字编码字符集基本集的扩充》中的汉字信息,并具有数据容量大、抗畸变和抗污损能力强、外观美观等特点,适合于在我国各行业的广泛应用。 测试文本,测试人:施煜,边峥,修兴强,袁娲,测试目的:汉字表示,测试版本:40本标准规定了一种矩阵式二维条码——汉信码的码制以及编译码方法。本标准中对汉信码的码图方", 0, "Bigger mixed" },
552 };
553 int data_size = ARRAY_SIZE(data);
554 int i, length, ret;
555
556 struct zint_symbol symbol = {0};
557 int ret_length;
558 #ifdef TEST_JUST_SAY_GNO
559 int ret_length2;
560 #endif
561 unsigned int ddata[8192];
562 unsigned char dest[8192];
563 int ret2 = 0;
564 #ifdef TEST_JUST_SAY_GNO
565 unsigned int ddata2[8192];
566 #endif
567
568 clock_t start;
569 clock_t total = 0, total_gno = 0, total_eci = 0;
570 clock_t diff, diff_gno, diff_eci;
571 int comment_max = 0;
572
573 if (!(debug & ZINT_DEBUG_TEST_PERFORMANCE)) { /* -d 256 */
574 return;
575 }
576
577 for (i = 0; i < data_size; i++) if ((int) strlen(data[i].comment) > comment_max) comment_max = (int) strlen(data[i].comment);
578
579 printf("Iterations %d\n", TEST_PERF_ITERATIONS);
580
581 for (i = 0; i < data_size; i++) {
582 int j;
583
584 if (testContinue(p_ctx, i)) continue;
585
586 length = (int) strlen(data[i].data);
587
588 diff = diff_gno = diff_eci = 0;
589
590 for (j = 0; j < TEST_PERF_ITERATIONS; j++) {
591 ret_length = length;
592
593 start = clock();
594 ret = gb18030_utf8(&symbol, (unsigned char *) data[i].data, &ret_length, ddata);
595 diff += clock() - start;
596
597 #ifdef TEST_JUST_SAY_GNO
598 ret_length2 = length;
599 start = clock();
600 ret2 = gb18030_utf8_wctomb(&symbol, (unsigned char *) data[i].data, &ret_length2, ddata2);
601 diff_gno += clock() - start;
602 #endif
603 ret_length = length;
604
605 start = clock();
606 (void)utf8_to_eci(32, (unsigned char *) data[i].data, dest, &ret_length);
607 diff_eci += clock() - start;
608 }
609 assert_equal(ret, ret2, "i:%d ret %d != ret2 %d\n", (int) i, ret, ret2);
610
611 printf("%*s: new % 8gms, gno % 8gms ratio % 9g, eci %gms\n", comment_max, data[i].comment,
612 TEST_PERF_TIME(diff), TEST_PERF_TIME(diff_gno), TEST_PERF_RATIO(diff, diff_gno), TEST_PERF_TIME(diff_eci));
613
614 total += diff;
615 total_gno += diff_gno;
616 }
617 if (p_ctx->index == -1) {
618 printf("%*s: new % 8gms, gno % 8gms ratio % 9g, eci %gms\n", comment_max, "totals",
619 TEST_PERF_TIME(total), TEST_PERF_TIME(total_gno), TEST_PERF_RATIO(total, total_gno), TEST_PERF_TIME(total_eci));
620 }
621 }
622
623 int main(int argc, char *argv[]) {
624
625 testFunction funcs[] = { /* name, func */
626 { "test_u_gb18030_int", test_u_gb18030_int },
627 { "test_gb18030_utf8", test_gb18030_utf8 },
628 { "test_gb18030_utf8_to_eci", test_gb18030_utf8_to_eci },
629 { "test_gb18030_cpy", test_gb18030_cpy },
630 { "test_u_gbk_int", test_u_gbk_int },
631 { "test_perf", test_perf },
632 };
633
634 testRun(argc, argv, funcs, ARRAY_SIZE(funcs));
635
636 testReport();
637
638 return 0;
639 }
640
641 /* vim: set ts=4 sw=4 et : */