comparison mupdf-source/thirdparty/leptonica/src/bardecode.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file bardecode.c
29 * <pre>
30 *
31 * Dispatcher
32 * char *barcodeDispatchDecoder()
33 *
34 * Format Determination
35 * static l_int32 barcodeFindFormat()
36 * l_int32 barcodeFormatIsSupported()
37 * static l_int32 barcodeVerifyFormat()
38 *
39 * Decode 2 of 5
40 * static char *barcodeDecode2of5()
41 *
42 * Decode Interleaved 2 of 5
43 * static char *barcodeDecodeI2of5()
44 *
45 * Decode Code 93
46 * static char *barcodeDecode93()
47 *
48 * Decode Code 39
49 * static char *barcodeDecode39()
50 *
51 * Decode Codabar
52 * static char *barcodeDecodeCodabar()
53 *
54 * Decode UPC-A
55 * static char *barcodeDecodeUpca()
56 *
57 * Decode EAN 13
58 * static char *barcodeDecodeEan13()
59 * </pre>
60 */
61
62 #ifdef HAVE_CONFIG_H
63 #include <config_auto.h>
64 #endif /* HAVE_CONFIG_H */
65
66 #include <string.h>
67 #include "allheaders.h"
68 #include "readbarcode.h"
69
70 static l_int32 barcodeFindFormat(char *barstr);
71 static l_int32 barcodeVerifyFormat(char *barstr, l_int32 format,
72 l_int32 *pvalid, l_int32 *preverse);
73 static char *barcodeDecode2of5(char *barstr, l_int32 debugflag);
74 static char *barcodeDecodeI2of5(char *barstr, l_int32 debugflag);
75 static char *barcodeDecode93(char *barstr, l_int32 debugflag);
76 static char *barcodeDecode39(char *barstr, l_int32 debugflag);
77 static char *barcodeDecodeCodabar(char *barstr, l_int32 debugflag);
78 static char *barcodeDecodeUpca(char *barstr, l_int32 debugflag);
79 static char *barcodeDecodeEan13(char *barstr, l_int32 first, l_int32 debugflag);
80
81 #ifndef NO_CONSOLE_IO
82 #define DEBUG_CODES 0
83 #endif /* ~NO_CONSOLE_IO */
84
85 /*------------------------------------------------------------------------*
86 * Decoding dispatcher *
87 *------------------------------------------------------------------------*/
88 /*!
89 * \brief barcodeDispatchDecoder()
90 *
91 * \param[in] barstr string of integers in set {1,2,3,4} of bar widths
92 * \param[in] format L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ...
93 * \param[in] debugflag use 1 to generate debug output
94 * \return data string of decoded barcode data, or NULL on error
95 */
96 char *
97 barcodeDispatchDecoder(char *barstr,
98 l_int32 format,
99 l_int32 debugflag)
100 {
101 char *data = NULL;
102
103 if (!barstr)
104 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
105
106 debugflag = FALSE; /* not used yet */
107
108 if (format == L_BF_ANY)
109 format = barcodeFindFormat(barstr);
110
111 if (format == L_BF_CODE2OF5)
112 data = barcodeDecode2of5(barstr, debugflag);
113 else if (format == L_BF_CODEI2OF5)
114 data = barcodeDecodeI2of5(barstr, debugflag);
115 else if (format == L_BF_CODE93)
116 data = barcodeDecode93(barstr, debugflag);
117 else if (format == L_BF_CODE39)
118 data = barcodeDecode39(barstr, debugflag);
119 else if (format == L_BF_CODABAR)
120 data = barcodeDecodeCodabar(barstr, debugflag);
121 else if (format == L_BF_UPCA)
122 data = barcodeDecodeUpca(barstr, debugflag);
123 else if (format == L_BF_EAN13)
124 data = barcodeDecodeEan13(barstr, 0, debugflag);
125 else
126 return (char *)ERROR_PTR("format not implemented", __func__, NULL);
127
128 return data;
129 }
130
131
132 /*------------------------------------------------------------------------*
133 * Barcode format determination *
134 *------------------------------------------------------------------------*/
135 /*!
136 * \brief barcodeFindFormat()
137 *
138 * \param[in] barstr of barcode widths, in set {1,2,3,4}
139 * \return format for barcode, or L_BF_UNKNOWN if not recognized
140 */
141 static l_int32
142 barcodeFindFormat(char *barstr)
143 {
144 l_int32 i, format, valid;
145
146 if (!barstr)
147 return ERROR_INT("barstr not defined", __func__, L_BF_UNKNOWN);
148
149 for (i = 0; i < NumSupportedBarcodeFormats; i++) {
150 format = SupportedBarcodeFormat[i];
151 barcodeVerifyFormat(barstr, format, &valid, NULL);
152 if (valid) {
153 L_INFO("Barcode format: %s\n", __func__,
154 SupportedBarcodeFormatName[i]);
155 return format;
156 }
157 }
158 return L_BF_UNKNOWN;
159 }
160
161
162 /*!
163 * \brief barcodeFormatIsSupported()
164 *
165 * \param[in] format
166 * \return 1 if format is one of those supported; 0 otherwise
167 *
168 */
169 l_int32
170 barcodeFormatIsSupported(l_int32 format)
171 {
172 l_int32 i;
173
174 for (i = 0; i < NumSupportedBarcodeFormats; i++) {
175 if (format == SupportedBarcodeFormat[i])
176 return 1;
177 }
178 return 0;
179 }
180
181
182 /*!
183 * \brief barcodeVerifyFormat()
184 *
185 * \param[in] barstr of barcode widths, in set {1,2,3,4}
186 * \param[in] format L_BF_CODEI2OF5, L_BF_CODE93, ...
187 * \param[out] pvalid 0 if not valid, 1 and 2 if valid
188 * \param[out] preverse [optional] 1 if reversed; 0 otherwise
189 * \return 0 if OK, 1 on error
190 *
191 * <pre>
192 * Notes:
193 * (1) If valid == 1, the barcode is of the given format in the
194 * forward order; if valid == 2, it is backwards.
195 * (2) If the barcode needs to be reversed to read it, and &reverse
196 * is provided, a 1 is put into %reverse.
197 * (3) Require at least 12 data bits, in addition to format identifiers.
198 * (TODO) If the barcode has a fixed length, this should be used
199 * explicitly, as is done for L_BF_UPCA and L_BF_EAN13.
200 * (4) (TODO) Add to this as more formats are supported.
201 * </pre>
202 */
203 static l_int32
204 barcodeVerifyFormat(char *barstr,
205 l_int32 format,
206 l_int32 *pvalid,
207 l_int32 *preverse)
208 {
209 char *revbarstr;
210 l_int32 i, start, len, stop, mid;
211
212 if (!pvalid)
213 return ERROR_INT("barstr not defined", __func__, 1);
214 *pvalid = 0;
215 if (preverse) *preverse = 0;
216 if (!barstr)
217 return ERROR_INT("barstr not defined", __func__, 1);
218
219 switch (format)
220 {
221 case L_BF_CODE2OF5:
222 start = !strncmp(barstr, Code2of5[C25_START], 3);
223 len = strlen(barstr);
224 if (len < 20)
225 return ERROR_INT("barstr too short for CODE2OF5", __func__, 1);
226 stop = !strncmp(&barstr[len - 5], Code2of5[C25_STOP], 5);
227 if (start && stop) {
228 *pvalid = 1;
229 } else {
230 revbarstr = stringReverse(barstr);
231 start = !strncmp(revbarstr, Code2of5[C25_START], 3);
232 stop = !strncmp(&revbarstr[len - 5], Code2of5[C25_STOP], 5);
233 LEPT_FREE(revbarstr);
234 if (start && stop) {
235 *pvalid = 1;
236 if (preverse) *preverse = 1;
237 }
238 }
239 break;
240 case L_BF_CODEI2OF5:
241 start = !strncmp(barstr, CodeI2of5[CI25_START], 4);
242 len = strlen(barstr);
243 if (len < 20)
244 return ERROR_INT("barstr too short for CODEI2OF5", __func__, 1);
245 stop = !strncmp(&barstr[len - 3], CodeI2of5[CI25_STOP], 3);
246 if (start && stop) {
247 *pvalid = 1;
248 } else {
249 revbarstr = stringReverse(barstr);
250 start = !strncmp(revbarstr, CodeI2of5[CI25_START], 4);
251 stop = !strncmp(&revbarstr[len - 3], CodeI2of5[CI25_STOP], 3);
252 LEPT_FREE(revbarstr);
253 if (start && stop) {
254 *pvalid = 1;
255 if (preverse) *preverse = 1;
256 }
257 }
258 break;
259 case L_BF_CODE93:
260 start = !strncmp(barstr, Code93[C93_START], 6);
261 len = strlen(barstr);
262 if (len < 28)
263 return ERROR_INT("barstr too short for CODE93", __func__, 1);
264 stop = !strncmp(&barstr[len - 7], Code93[C93_STOP], 6);
265 if (start && stop) {
266 *pvalid = 1;
267 } else {
268 revbarstr = stringReverse(barstr);
269 start = !strncmp(revbarstr, Code93[C93_START], 6);
270 stop = !strncmp(&revbarstr[len - 7], Code93[C93_STOP], 6);
271 LEPT_FREE(revbarstr);
272 if (start && stop) {
273 *pvalid = 1;
274 if (preverse) *preverse = 1;
275 }
276 }
277 break;
278 case L_BF_CODE39:
279 start = !strncmp(barstr, Code39[C39_START], 9);
280 len = strlen(barstr);
281 if (len < 30)
282 return ERROR_INT("barstr too short for CODE39", __func__, 1);
283 stop = !strncmp(&barstr[len - 9], Code39[C39_STOP], 9);
284 if (start && stop) {
285 *pvalid = 1;
286 } else {
287 revbarstr = stringReverse(barstr);
288 start = !strncmp(revbarstr, Code39[C39_START], 9);
289 stop = !strncmp(&revbarstr[len - 9], Code39[C39_STOP], 9);
290 LEPT_FREE(revbarstr);
291 if (start && stop) {
292 *pvalid = 1;
293 if (preverse) *preverse = 1;
294 }
295 }
296 break;
297 case L_BF_CODABAR:
298 start = stop = 0;
299 len = strlen(barstr);
300 if (len < 26)
301 return ERROR_INT("barstr too short for CODABAR", __func__, 1);
302 for (i = 16; i <= 19; i++) /* any of these will do */
303 start += !strncmp(barstr, Codabar[i], 7);
304 for (i = 16; i <= 19; i++) /* ditto */
305 stop += !strncmp(&barstr[len - 7], Codabar[i], 7);
306 if (start && stop) {
307 *pvalid = 1;
308 } else {
309 start = stop = 0;
310 revbarstr = stringReverse(barstr);
311 for (i = 16; i <= 19; i++)
312 start += !strncmp(revbarstr, Codabar[i], 7);
313 for (i = 16; i <= 19; i++)
314 stop += !strncmp(&revbarstr[len - 7], Codabar[i], 7);
315 LEPT_FREE(revbarstr);
316 if (start && stop) {
317 *pvalid = 1;
318 if (preverse) *preverse = 1;
319 }
320 }
321 break;
322 case L_BF_UPCA:
323 case L_BF_EAN13:
324 len = strlen(barstr);
325 if (len != 59)
326 return ERROR_INT("invalid length for UPCA or EAN13", __func__, 1);
327 start = !strncmp(barstr, Upca[UPCA_START], 3);
328 mid = !strncmp(&barstr[27], Upca[UPCA_MID], 5);
329 stop = !strncmp(&barstr[len - 3], Upca[UPCA_STOP], 3);
330 if (start && mid && stop)
331 *pvalid = 1;
332 break;
333 default:
334 return ERROR_INT("format not supported", __func__, 1);
335 }
336
337 return 0;
338 }
339
340
341 /*------------------------------------------------------------------------*
342 * Code 2 of 5 *
343 *------------------------------------------------------------------------*/
344 /*!
345 * \brief barcodeDecode2of5()
346 *
347 * \param[in] barstr of widths, in set {1, 2}
348 * \param[in] debugflag
349 * \return data string of digits, or NULL if none found or on error
350 *
351 * <pre>
352 * Notes:
353 * (1) Ref: http://en.wikipedia.org/wiki/Two-out-of-five_code (Note:
354 * the codes given here are wrong!)
355 * http://morovia.com/education/symbology/code25.asp
356 * (2) This is a very low density encoding for the 10 digits.
357 * Each digit is encoded with 5 black bars, of which 2 are wide
358 * and 3 are narrow. No information is carried in the spaces
359 * between the bars, which are all equal in width, represented by
360 * a "1" in our encoding.
361 * (3) The mapping from the sequence of five bar widths to the
362 * digit is identical to the mapping used by the interleaved
363 * 2 of 5 code. The start code is 21211, representing two
364 * wide bars and a narrow bar, and the interleaved "1" spaces
365 * are explicit. The stop code is 21112. For all codes
366 * (including start and stop), the trailing space "1" is
367 * implicit -- there is no reason to represent it in the
368 * Code2of5[] array.
369 * </pre>
370 */
371 static char *
372 barcodeDecode2of5(char *barstr,
373 l_int32 debugflag)
374 {
375 char *data, *vbarstr;
376 char code[10];
377 l_int32 valid, reverse, i, j, len, error, ndigits, start, found;
378
379 if (!barstr)
380 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
381
382 /* Verify format; reverse if necessary */
383 barcodeVerifyFormat(barstr, L_BF_CODE2OF5, &valid, &reverse);
384 if (!valid)
385 return (char *)ERROR_PTR("barstr not in 2of5 format", __func__, NULL);
386 if (reverse)
387 vbarstr = stringReverse(barstr);
388 else
389 vbarstr = stringNew(barstr);
390
391 /* Verify size */
392 len = strlen(vbarstr);
393 if ((len - 11) % 10 != 0) {
394 LEPT_FREE(vbarstr);
395 return (char *)ERROR_PTR("size not divisible by 10: invalid 2of5 code",
396 __func__, NULL);
397 }
398
399 error = FALSE;
400 ndigits = (len - 11) / 10;
401 data = (char *)LEPT_CALLOC(ndigits + 1, sizeof(char));
402 memset(code, 0, 10);
403 for (i = 0; i < ndigits; i++) {
404 start = 6 + 10 * i;
405 for (j = 0; j < 9; j++)
406 code[j] = vbarstr[start + j];
407
408 if (debugflag)
409 lept_stderr("code: %s\n", code);
410
411 found = FALSE;
412 for (j = 0; j < 10; j++) {
413 if (!strcmp(code, Code2of5[j])) {
414 data[i] = 0x30 + j;
415 found = TRUE;
416 break;
417 }
418 }
419 if (!found) error = TRUE;
420 }
421 LEPT_FREE(vbarstr);
422
423 if (error) {
424 LEPT_FREE(data);
425 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
426 }
427
428 return data;
429 }
430
431
432 /*------------------------------------------------------------------------*
433 * Interleaved Code 2 of 5 *
434 *------------------------------------------------------------------------*/
435 /*!
436 * \brief barcodeDecodeI2of5()
437 *
438 * \param[in] barstr of widths, in set {1, 2}
439 * \param[in] debugflag
440 * \return data string of digits, or NULL if none found or on error
441 *
442 * <pre>
443 * Notes:
444 * (1) Ref: http://en.wikipedia.org/wiki/Interleaved_2_of_5
445 * (2) This always encodes an even number of digits.
446 * The start code is 1111; the stop code is 211.
447 * </pre>
448 */
449 static char *
450 barcodeDecodeI2of5(char *barstr,
451 l_int32 debugflag)
452 {
453 char *data, *vbarstr;
454 char code1[6], code2[6];
455 l_int32 valid, reverse, i, j, len, error, npairs, start, found;
456
457 if (!barstr)
458 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
459
460 /* Verify format; reverse if necessary */
461 barcodeVerifyFormat(barstr, L_BF_CODEI2OF5, &valid, &reverse);
462 if (!valid)
463 return (char *)ERROR_PTR("barstr not in i2of5 format", __func__, NULL);
464 if (reverse)
465 vbarstr = stringReverse(barstr);
466 else
467 vbarstr = stringNew(barstr);
468
469 /* Verify size */
470 len = strlen(vbarstr);
471 if ((len - 7) % 10 != 0) {
472 LEPT_FREE(vbarstr);
473 return (char *)ERROR_PTR("size not divisible by 10: invalid I2of5 code",
474 __func__, NULL);
475 }
476
477 error = FALSE;
478 npairs = (len - 7) / 10;
479 data = (char *)LEPT_CALLOC(2 * npairs + 1, sizeof(char));
480 memset(code1, 0, 6);
481 memset(code2, 0, 6);
482 for (i = 0; i < npairs; i++) {
483 start = 4 + 10 * i;
484 for (j = 0; j < 5; j++) {
485 code1[j] = vbarstr[start + 2 * j];
486 code2[j] = vbarstr[start + 2 * j + 1];
487 }
488
489 if (debugflag)
490 lept_stderr("code1: %s, code2: %s\n", code1, code2);
491
492 found = FALSE;
493 for (j = 0; j < 10; j++) {
494 if (!strcmp(code1, CodeI2of5[j])) {
495 data[2 * i] = 0x30 + j;
496 found = TRUE;
497 break;
498 }
499 }
500 if (!found) error = TRUE;
501 found = FALSE;
502 for (j = 0; j < 10; j++) {
503 if (!strcmp(code2, CodeI2of5[j])) {
504 data[2 * i + 1] = 0x30 + j;
505 found = TRUE;
506 break;
507 }
508 }
509 if (!found) error = TRUE;
510 }
511 LEPT_FREE(vbarstr);
512
513 if (error) {
514 LEPT_FREE(data);
515 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
516 }
517
518 return data;
519 }
520
521
522 /*------------------------------------------------------------------------*
523 * Code 93 *
524 *------------------------------------------------------------------------*/
525 /*!
526 * \brief barcodeDecode93()
527 *
528 * \param[in] barstr of widths, in set {1, 2, 3, 4}
529 * \param[in] debugflag
530 * \return data string of digits, or NULL if none found or on error
531 *
532 * <pre>
533 * Notes:
534 * (1) Ref: http://en.wikipedia.org/wiki/Code93
535 * http://morovia.com/education/symbology/code93.asp
536 * (2) Each symbol has 3 black and 3 white bars.
537 * The start and stop codes are 111141; the stop code then is
538 * terminated with a final (1) bar.
539 * (3) The last two codes are check codes. We are checking them
540 * for correctness, and issuing a warning on failure. Should
541 * probably not return any data on failure.
542 * </pre>
543 */
544 static char *
545 barcodeDecode93(char *barstr,
546 l_int32 debugflag)
547 {
548 const char *checkc, *checkk;
549 char *data, *vbarstr;
550 char code[7];
551 l_int32 valid, reverse, i, j, len, error, nsymb, start, found, sum;
552 l_int32 *index;
553
554 if (!barstr)
555 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
556
557 /* Verify format; reverse if necessary */
558 barcodeVerifyFormat(barstr, L_BF_CODE93, &valid, &reverse);
559 if (!valid)
560 return (char *)ERROR_PTR("barstr not in code93 format", __func__, NULL);
561 if (reverse)
562 vbarstr = stringReverse(barstr);
563 else
564 vbarstr = stringNew(barstr);
565
566 /* Verify size; skip the first 6 and last 7 bars. */
567 len = strlen(vbarstr);
568 if ((len - 13) % 6 != 0) {
569 LEPT_FREE(vbarstr);
570 return (char *)ERROR_PTR("size not divisible by 6: invalid code 93",
571 __func__, NULL);
572 }
573
574 /* Decode the symbols */
575 nsymb = (len - 13) / 6;
576 data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char));
577 index = (l_int32 *)LEPT_CALLOC(nsymb, sizeof(l_int32));
578 memset(code, 0, 7);
579 error = FALSE;
580 for (i = 0; i < nsymb; i++) {
581 start = 6 + 6 * i;
582 for (j = 0; j < 6; j++)
583 code[j] = vbarstr[start + j];
584
585 if (debugflag)
586 lept_stderr("code: %s\n", code);
587
588 found = FALSE;
589 for (j = 0; j < C93_START; j++) {
590 if (!strcmp(code, Code93[j])) {
591 data[i] = Code93Val[j];
592 index[i] = j;
593 found = TRUE;
594 break;
595 }
596 }
597 if (!found) error = TRUE;
598 }
599 LEPT_FREE(vbarstr);
600
601 if (error) {
602 LEPT_FREE(index);
603 LEPT_FREE(data);
604 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
605 }
606
607 /* Do check sums. For character "C", use only the
608 * actual data in computing the sum. For character "K",
609 * use the actual data plus the check character "C". */
610 sum = 0;
611 for (i = 0; i < nsymb - 2; i++) /* skip the "C" and "K" */
612 sum += ((i % 20) + 1) * index[nsymb - 3 - i];
613 if (data[nsymb - 2] != Code93Val[sum % 47])
614 L_WARNING("Error for check C\n", __func__);
615
616 if (debugflag) {
617 checkc = Code93[sum % 47];
618 lept_stderr("checkc = %s\n", checkc);
619 }
620
621 sum = 0;
622 for (i = 0; i < nsymb - 1; i++) /* skip the "K" */
623 sum += ((i % 15) + 1) * index[nsymb - 2 - i];
624 if (data[nsymb - 1] != Code93Val[sum % 47])
625 L_WARNING("Error for check K\n", __func__);
626
627 if (debugflag) {
628 checkk = Code93[sum % 47];
629 lept_stderr("checkk = %s\n", checkk);
630 }
631
632 /* Remove the two check codes from the output */
633 data[nsymb - 2] = '\0';
634
635 LEPT_FREE(index);
636 return data;
637 }
638
639
640 /*------------------------------------------------------------------------*
641 * Code 39 *
642 *------------------------------------------------------------------------*/
643 /*!
644 * \brief barcodeDecode39()
645 *
646 * \param[in] barstr of widths, in set {1, 2}
647 * \param[in] debugflag
648 * \return data string of digits, or NULL if none found or on error
649 *
650 * <pre>
651 * Notes:
652 * (1) Ref: http://en.wikipedia.org/wiki/Code39
653 * http://morovia.com/education/symbology/code39.asp
654 * (2) Each symbol has 5 black and 4 white bars.
655 * The start and stop codes are 121121211 (the asterisk)
656 * (3) This decoder was contributed by Roger Hyde.
657 * </pre>
658 */
659 static char *
660 barcodeDecode39(char *barstr,
661 l_int32 debugflag)
662 {
663 char *data, *vbarstr;
664 char code[10];
665 l_int32 valid, reverse, i, j, len, error, nsymb, start, found;
666
667 if (!barstr)
668 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
669
670 /* Verify format; reverse if necessary */
671 barcodeVerifyFormat(barstr, L_BF_CODE39, &valid, &reverse);
672 if (!valid)
673 return (char *)ERROR_PTR("barstr not in code39 format", __func__, NULL);
674 if (reverse)
675 vbarstr = stringReverse(barstr);
676 else
677 vbarstr = stringNew(barstr);
678
679 /* Verify size */
680 len = strlen(vbarstr);
681 if ((len + 1) % 10 != 0) {
682 LEPT_FREE(vbarstr);
683 return (char *)ERROR_PTR("size+1 not divisible by 10: invalid code 39",
684 __func__, NULL);
685 }
686
687 /* Decode the symbols */
688 nsymb = (len - 19) / 10;
689 data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char));
690 memset(code, 0, 10);
691 error = FALSE;
692 for (i = 0; i < nsymb; i++) {
693 start = 10 + 10 * i;
694 for (j = 0; j < 9; j++)
695 code[j] = vbarstr[start + j];
696
697 if (debugflag)
698 lept_stderr("code: %s\n", code);
699
700 found = FALSE;
701 for (j = 0; j < C39_START; j++) {
702 if (!strcmp(code, Code39[j])) {
703 data[i] = Code39Val[j];
704 found = TRUE;
705 break;
706 }
707 }
708 if (!found) error = TRUE;
709 }
710 LEPT_FREE(vbarstr);
711
712 if (error) {
713 LEPT_FREE(data);
714 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
715 }
716
717 return data;
718 }
719
720
721 /*------------------------------------------------------------------------*
722 * Codabar *
723 *------------------------------------------------------------------------*/
724 /*!
725 * \brief barcodeDecodeCodabar()
726 *
727 * \param[in] barstr of widths, in set {1, 2}
728 * \param[in] debugflag
729 * \return data string of digits, or NULL if none found or on error
730 *
731 * <pre>
732 * Notes:
733 * (1) Ref: http://en.wikipedia.org/wiki/Codabar
734 * http://morovia.com/education/symbology/codabar.asp
735 * (2) Each symbol has 4 black and 3 white bars. They represent the
736 * 10 digits, and optionally 6 other characters. The start and
737 * stop codes can be any of four (typically denoted A,B,C,D).
738 * </pre>
739 */
740 static char *
741 barcodeDecodeCodabar(char *barstr,
742 l_int32 debugflag)
743 {
744 char *data, *vbarstr;
745 char code[8];
746 l_int32 valid, reverse, i, j, len, error, nsymb, start, found;
747
748 if (!barstr)
749 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
750
751 /* Verify format; reverse if necessary */
752 barcodeVerifyFormat(barstr, L_BF_CODABAR, &valid, &reverse);
753 if (!valid)
754 return (char *)ERROR_PTR("barstr not in codabar format",
755 __func__, NULL);
756 if (reverse)
757 vbarstr = stringReverse(barstr);
758 else
759 vbarstr = stringNew(barstr);
760
761 /* Verify size */
762 len = strlen(vbarstr);
763 if ((len + 1) % 8 != 0) {
764 LEPT_FREE(vbarstr);
765 return (char *)ERROR_PTR("size+1 not divisible by 8: invalid codabar",
766 __func__, NULL);
767 }
768
769 /* Decode the symbols */
770 nsymb = (len - 15) / 8;
771 data = (char *)LEPT_CALLOC(nsymb + 1, sizeof(char));
772 memset(code, 0, 8);
773 error = FALSE;
774 for (i = 0; i < nsymb; i++) {
775 start = 8 + 8 * i;
776 for (j = 0; j < 7; j++)
777 code[j] = vbarstr[start + j];
778
779 if (debugflag)
780 lept_stderr("code: %s\n", code);
781
782 found = FALSE;
783 for (j = 0; j < 16; j++) {
784 if (!strcmp(code, Codabar[j])) {
785 data[i] = CodabarVal[j];
786 found = TRUE;
787 break;
788 }
789 }
790 if (!found) error = TRUE;
791 }
792 LEPT_FREE(vbarstr);
793
794 if (error) {
795 LEPT_FREE(data);
796 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
797 }
798
799 return data;
800 }
801
802
803 /*------------------------------------------------------------------------*
804 * Code UPC-A *
805 *------------------------------------------------------------------------*/
806 /*!
807 * \brief barcodeDecodeUpca()
808 *
809 * \param[in] barstr of widths, in set {1, 2, 3, 4}
810 * \param[in] debugflag
811 * \return data string of digits, or NULL if none found or on error
812 *
813 * <pre>
814 * Notes:
815 * (1) Ref: http://en.wikipedia.org/wiki/UniversalProductCode
816 * http://morovia.com/education/symbology/upc-a.asp
817 * (2) Each symbol has 2 black and 2 white bars, and encodes a digit.
818 * The start and stop codes are 111 and 111. There are a total of
819 * 30 black bars, encoding 12 digits in two sets of 6, with
820 * 2 black bars separating the sets.
821 * (3) The last digit is a check digit. We check for correctness, and
822 * issue a warning on failure. Should probably not return any
823 * data on failure.
824 * </pre>
825 */
826 static char *
827 barcodeDecodeUpca(char *barstr,
828 l_int32 debugflag)
829 {
830 char *data, *vbarstr;
831 char code[5];
832 l_int32 valid, i, j, len, error, start, found, sum, checkdigit;
833
834 if (!barstr)
835 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
836
837 /* Verify format; reverse has no meaning here -- we must test both */
838 barcodeVerifyFormat(barstr, L_BF_UPCA, &valid, NULL);
839 if (!valid)
840 return (char *)ERROR_PTR("barstr not in UPC-A format", __func__, NULL);
841
842 /* Verify size */
843 len = strlen(barstr);
844 if (len != 59)
845 return (char *)ERROR_PTR("size not 59; invalid UPC-A barcode",
846 __func__, NULL);
847
848 /* Check the first digit. If invalid, reverse the string. */
849 memset(code, 0, 5);
850 for (i = 0; i < 4; i++)
851 code[i] = barstr[i + 3];
852 found = FALSE;
853 for (i = 0; i < 10; i++) {
854 if (!strcmp(code, Upca[i])) {
855 found = TRUE;
856 break;
857 }
858 }
859 if (found == FALSE)
860 vbarstr = stringReverse(barstr);
861 else
862 vbarstr = stringNew(barstr);
863
864 /* Decode the 12 symbols */
865 data = (char *)LEPT_CALLOC(13, sizeof(char));
866 memset(code, 0, 5);
867 error = FALSE;
868 for (i = 0; i < 12; i++) {
869 if (i < 6)
870 start = 3 + 4 * i;
871 else
872 start = 32 + 4 * (i - 6);
873 for (j = 0; j < 4; j++)
874 code[j] = vbarstr[start + j];
875
876 if (debugflag)
877 lept_stderr("code: %s\n", code);
878
879 found = FALSE;
880 for (j = 0; j < 10; j++) {
881 if (!strcmp(code, Upca[j])) {
882 data[i] = 0x30 + j;
883 found = TRUE;
884 break;
885 }
886 }
887 if (!found) error = TRUE;
888 }
889 LEPT_FREE(vbarstr);
890
891 if (error) {
892 LEPT_FREE(data);
893 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
894 }
895
896 /* Calculate the check digit (data[11]). */
897 sum = 0;
898 for (i = 0; i < 12; i += 2) /* "even" digits */
899 sum += 3 * (data[i] - 0x30);
900 for (i = 1; i < 11; i += 2) /* "odd" digits */
901 sum += (data[i] - 0x30);
902 checkdigit = sum % 10;
903 if (checkdigit) /* not 0 */
904 checkdigit = 10 - checkdigit;
905 if (checkdigit + 0x30 != data[11])
906 L_WARNING("Error for UPC-A check character\n", __func__);
907
908 return data;
909 }
910
911
912 /*------------------------------------------------------------------------*
913 * Code EAN-13 *
914 *------------------------------------------------------------------------*/
915 /*!
916 * \brief barcodeDecodeEan13()
917 *
918 * \param[in] barstr of widths, in set {1, 2, 3, 4}
919 * \param[in] first first digit: 0 - 9
920 * \param[in] debugflag
921 * \return data string of digits, or NULL if none found or on error
922 *
923 * <pre>
924 * Notes:
925 * (1) Ref: http://en.wikipedia.org/wiki/UniversalProductCode
926 * http://morovia.com/education/symbology/ean-13.asp
927 * (2) The encoding is essentially the same as UPC-A, except
928 * there are 13 digits in total, of which 12 are encoded
929 * by bars (as with UPC-A) and the 13th is a leading digit
930 * that determines the encoding of the next 6 digits,
931 * selecting each digit from one of two tables.
932 * encoded in the bars (as with UPC-A). If the first digit
933 * is 0, the encoding is identical to UPC-A.
934 * (3) As with UPC-A, the last digit is a check digit.
935 * (4) For now, we assume the first digit is input to this function.
936 * Eventually, we will read it by pattern matching.
937 *
938 * TODO: fix this for multiple tables, depending on the value of %first
939 * </pre>
940 */
941 static char *
942 barcodeDecodeEan13(char *barstr,
943 l_int32 first,
944 l_int32 debugflag)
945 {
946 char *data, *vbarstr;
947 char code[5];
948 l_int32 valid, i, j, len, error, start, found, sum, checkdigit;
949
950 if (!barstr)
951 return (char *)ERROR_PTR("barstr not defined", __func__, NULL);
952
953 /* Verify format. You can't tell the orientation by the start
954 * and stop codes, but you can by the location of the digits.
955 * Use the UPCA verifier for EAN 13 -- it is identical. */
956 barcodeVerifyFormat(barstr, L_BF_UPCA, &valid, NULL);
957 if (!valid)
958 return (char *)ERROR_PTR("barstr not in EAN 13 format", __func__, NULL);
959
960 /* Verify size */
961 len = strlen(barstr);
962 if (len != 59)
963 return (char *)ERROR_PTR("size not 59; invalid EAN 13 barcode",
964 __func__, NULL);
965
966 /* Check the first digit. If invalid, reverse the string. */
967 memset(code, 0, 5);
968 for (i = 0; i < 4; i++)
969 code[i] = barstr[i + 3];
970 found = FALSE;
971 for (i = 0; i < 10; i++) {
972 if (!strcmp(code, Upca[i])) {
973 found = TRUE;
974 break;
975 }
976 }
977 if (found == FALSE)
978 vbarstr = stringReverse(barstr);
979 else
980 vbarstr = stringNew(barstr);
981
982 /* Decode the 12 symbols */
983 data = (char *)LEPT_CALLOC(13, sizeof(char));
984 memset(code, 0, 5);
985 error = FALSE;
986 for (i = 0; i < 12; i++) {
987 if (i < 6)
988 start = 3 + 4 * i;
989 else
990 start = 32 + 4 * (i - 6);
991 for (j = 0; j < 4; j++)
992 code[j] = vbarstr[start + j];
993
994 if (debugflag)
995 lept_stderr("code: %s\n", code);
996
997 found = FALSE;
998 for (j = 0; j < 10; j++) {
999 if (!strcmp(code, Upca[j])) {
1000 data[i] = 0x30 + j;
1001 found = TRUE;
1002 break;
1003 }
1004 }
1005 if (!found) error = TRUE;
1006 }
1007 LEPT_FREE(vbarstr);
1008
1009 if (error) {
1010 LEPT_FREE(data);
1011 return (char *)ERROR_PTR("error in decoding", __func__, NULL);
1012 }
1013
1014 /* Calculate the check digit (data[11]). */
1015 sum = 0;
1016 for (i = 0; i < 12; i += 2) /* "even" digits */
1017 sum += 3 * (data[i] - 0x30);
1018 for (i = 1; i < 12; i += 2) /* "odd" digits */
1019 sum += (data[i] - 0x30);
1020 checkdigit = sum % 10;
1021 if (checkdigit) /* not 0 */
1022 checkdigit = 10 - checkdigit;
1023 if (checkdigit + 0x30 != data[11])
1024 L_WARNING("Error for EAN-13 check character\n", __func__);
1025
1026 return data;
1027 }