comparison mupdf-source/thirdparty/leptonica/src/utils1.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file utils1.c
29 * <pre>
30 *
31 * ------------------------------------------
32 * This file has these utilities:
33 * - error, warning and info messages
34 * - redirection of stderr
35 * - low-level endian conversions
36 * - file corruption operations
37 * - random and prime number operations
38 * - 64-bit hash functions
39 * - leptonica version number accessor
40 * - timing and date operations
41 * ------------------------------------------
42 *
43 * Control of error, warning and info messages
44 * l_int32 setMsgSeverity()
45 *
46 * Error return functions, invoked by macros
47 * l_int32 returnErrorInt()
48 * l_float32 returnErrorFloat()
49 * void *returnErrorPtr()
50 * l_int32 returnErrorInt1()
51 * l_float32 returnErrorFloat1()
52 * void *returnErrorPtr1()
53 *
54 * Runtime redirection of stderr
55 * void leptSetStderrHandler()
56 * void lept_stderr()
57 *
58 * Test files for equivalence
59 * l_int32 filesAreIdentical()
60 *
61 * Byte-swapping data conversion
62 * l_uint16 convertOnBigEnd16()
63 * l_uint32 convertOnBigEnd32()
64 * l_uint16 convertOnLittleEnd16()
65 * l_uint32 convertOnLittleEnd32()
66 *
67 * File corruption and byte replacement operations
68 * l_int32 fileCorruptByDeletion()
69 * l_int32 fileCorruptByMutation()
70 * l_int32 fileReplaceBytes()
71 *
72 * Generate random integer in given interval
73 * l_int32 genRandomIntOnInterval()
74 *
75 * Simple math functions
76 * l_int32 lept_roundftoi()
77 * l_int32 lept_floor()
78 * l_int32 lept_ceiling()
79 *
80 * 64-bit hash functions
81 * l_int32 l_hashStringToUint64()
82 * l_int32 l_hashStringToUint64Fast()
83 * l_int32 l_hashPtToUint64()
84 * l_int32 l_hashFloat64ToUint64()
85 *
86 * Prime finders
87 * l_int32 findNextLargerPrime()
88 * l_int32 lept_isPrime()
89 *
90 * Gray code conversion
91 * l_uint32 convertIntToGrayCode()
92 * l_uint32 convertGrayCodeToInt()
93 *
94 * Leptonica version number
95 * char *getLeptonicaVersion()
96 *
97 * Timing
98 * void startTimer()
99 * l_float32 stopTimer()
100 * L_TIMER startTimerNested()
101 * l_float32 stopTimerNested()
102 * void l_getCurrentTime()
103 * L_WALLTIMER *startWallTimer()
104 * l_float32 stopWallTimer()
105 * void l_getFormattedDate()
106 *
107 * For all issues with cross-platform development, see utils2.c.
108 * </pre>
109 */
110
111 #ifdef HAVE_CONFIG_H
112 #include <config_auto.h>
113 #endif /* HAVE_CONFIG_H */
114
115 #ifdef _WIN32
116 #include <windows.h>
117 #endif /* _WIN32 */
118
119 #include <time.h>
120 #include "allheaders.h"
121 #include <math.h>
122
123 /* Global for controlling message output at runtime */
124 LEPT_DLL l_int32 LeptMsgSeverity = DEFAULT_SEVERITY;
125
126 #define DEBUG_SEV 0
127
128 /*----------------------------------------------------------------------*
129 * Control of error, warning and info messages *
130 *----------------------------------------------------------------------*/
131 /*!
132 * \brief setMsgSeverity()
133 *
134 * \param[in] newsev
135 * \return oldsev
136 *
137 * <pre>
138 * Notes:
139 * (1) setMsgSeverity() allows the user to specify the desired
140 * message severity threshold. Messages of equal or greater
141 * severity will be output. The previous message severity is
142 * returned when the new severity is set.
143 * (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be
144 * obtained from the LEPT_MSG_SEVERITY environment variable.
145 * </pre>
146 */
147 l_int32
148 setMsgSeverity(l_int32 newsev)
149 {
150 l_int32 oldsev;
151 char *envsev;
152
153 oldsev = LeptMsgSeverity;
154 if (newsev == L_SEVERITY_EXTERNAL) {
155 envsev = getenv("LEPT_MSG_SEVERITY");
156 if (envsev) {
157 LeptMsgSeverity = atoi(envsev);
158 #if DEBUG_SEV
159 L_INFO("message severity set to external\n", "setMsgSeverity");
160 #endif /* DEBUG_SEV */
161 } else {
162 #if DEBUG_SEV
163 L_WARNING("environment var LEPT_MSG_SEVERITY not defined\n",
164 "setMsgSeverity");
165 #endif /* DEBUG_SEV */
166 }
167 } else {
168 LeptMsgSeverity = newsev;
169 #if DEBUG_SEV
170 L_INFO("message severity set to %d\n", "setMsgSeverity", newsev);
171 #endif /* DEBUG_SEV */
172 }
173
174 return oldsev;
175 }
176
177
178 /*----------------------------------------------------------------------*
179 * Error return functions, invoked by macros *
180 *----------------------------------------------------------------------*
181 * *
182 * (1) These error functions print messages to stderr and allow *
183 * exit from the function that called them. *
184 * (2) They must be invoked only by the three argument macros *
185 * ERROR_INT, ERROR_FLOAT, ERROR_PTR *
186 * or the four argument macros *
187 * ERROR_INT_1, ERROR_FLOAT_1, ERROR_PTR_1 *
188 * which are in environ.h. *
189 * (3) The print output can be disabled at compile time, either *
190 * by using -DNO_CONSOLE_IO or by setting LeptMsgSeverity. *
191 *----------------------------------------------------------------------*/
192 /*!
193 * \brief returnErrorInt()
194 *
195 * \param[in] msg error message
196 * \param[in] procname use __func__
197 * \param[in] ival return error val (typically 1 for an error)
198 * \return ival
199 */
200 l_int32
201 returnErrorInt(const char *msg,
202 const char *procname,
203 l_int32 ival)
204 {
205 lept_stderr("Error in %s: %s\n", procname, msg);
206 return ival;
207 }
208
209
210 /*!
211 * \brief returnErrorFloat()
212 *
213 * \param[in] msg error message
214 * \param[in] procname use __func__
215 * \param[in] fval return float error val
216 * \return fval
217 */
218 l_float32
219 returnErrorFloat(const char *msg,
220 const char *procname,
221 l_float32 fval)
222 {
223 lept_stderr("Error in %s: %s\n", procname, msg);
224 return fval;
225 }
226
227
228 /*!
229 * \brief returnErrorPtr()
230 *
231 * \param[in] msg error message
232 * \param[in] procname use __func__
233 * \param[in] pval return error val (typically null for an error)
234 * \return pval
235 */
236 void *
237 returnErrorPtr(const char *msg,
238 const char *procname,
239 void *pval)
240 {
241 lept_stderr("Error in %s: %s\n", procname, msg);
242 return pval;
243 }
244
245
246 /*!
247 * \brief returnErrorInt1()
248 *
249 * \param[in] msg error message
250 * \param[in] arg additional error message argument
251 * (will be appended to the error message)
252 * \param[in] procname use __func__
253 * \param[in] ival return error val; typically 1 for an error return
254 * \return ival typically 1 for an error return
255 */
256 l_int32
257 returnErrorInt1(const char *msg,
258 const char *arg,
259 const char *procname,
260 l_int32 ival)
261 {
262 lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
263 return ival;
264 }
265
266
267 /*!
268 * \brief returnErrorFloat1()
269 *
270 * \param[in] msg error message
271 * \param[in] arg additional error message argument
272 * (will be appended to the error message)
273 * \param[in] procname use __func__
274 * \param[in] fval return float error val
275 * \return fval
276 */
277 l_float32
278 returnErrorFloat1(const char *msg,
279 const char *arg,
280 const char *procname,
281 l_float32 fval)
282 {
283 lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
284 return fval;
285 }
286
287
288 /*!
289 * \brief returnErrorPtr1()
290 *
291 * \param[in] msg error message
292 * \param[in] arg additional error message argument
293 * (will be appended to the error message)
294 * \param[in] procname use __func__
295 * \param[in] pval return error val (typically null for an error)
296 * \return pval
297 */
298 void *
299 returnErrorPtr1(const char *msg,
300 const char *arg,
301 const char *procname,
302 void *pval)
303 {
304 lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
305 return pval;
306 }
307
308
309 /*------------------------------------------------------------------------*
310 * Runtime redirection of stderr *
311 *------------------------------------------------------------------------*
312 * *
313 * The user can provide a callback function to redirect messages *
314 * that would otherwise go to stderr. Here are two examples: *
315 * (1) to stop all messages: *
316 * void send_to_devnull(const char *msg) {} *
317 * (2) to write to the system logger: *
318 * void send_to_syslog(const char *msg) { *
319 * syslog(1, msg); *
320 * } *
321 * These would then be registered using *
322 * leptSetStderrHandler(send_to_devnull); *
323 * and *
324 * leptSetStderrHandler(send_to_syslog); *
325 *------------------------------------------------------------------------*/
326 /* By default, all messages go to stderr */
327 static void lept_default_stderr_handler(const char *formatted_msg)
328 {
329 if (formatted_msg)
330 fputs(formatted_msg, stderr);
331 }
332
333 /* The stderr callback handler is private to leptonica.
334 * By default it writes to stderr. */
335 void (*stderr_handler)(const char *) = lept_default_stderr_handler;
336
337
338 /*!
339 * \brief leptSetStderrHandler()
340 *
341 * \param[in] handler callback function for lept_stderr output
342 * \return void
343 *
344 * <pre>
345 * Notes:
346 * (1) This registers a handler for redirection of output to stderr
347 * at runtime.
348 * (2) If called with NULL, the output goes to stderr.
349 * </pre>
350 */
351 void leptSetStderrHandler(void (*handler)(const char *))
352 {
353 if (handler)
354 stderr_handler = handler;
355 else
356 stderr_handler = lept_default_stderr_handler;
357 }
358
359
360 #define MAX_DEBUG_MESSAGE 2000
361 /*!
362 * \brief lept_stderr()
363 *
364 * \param[in] fmt format string
365 * \param[in] ... varargs
366 * \return void
367 *
368 * <pre>
369 * Notes:
370 * (1) This is a replacement for fprintf(), to allow redirection
371 * of output. All calls to fprintf(stderr, ...) are replaced
372 * with calls to lept_stderr(...).
373 * (2) The message size is limited to 2K bytes.
374 (3) This utility was provided by jbarlow83.
375 * </pre>
376 */
377 void lept_stderr(const char *fmt, ...)
378 {
379 va_list args;
380 char msg[MAX_DEBUG_MESSAGE];
381 l_int32 n;
382
383 va_start(args, fmt);
384 n = vsnprintf(msg, sizeof(msg), fmt, args);
385 va_end(args);
386 if (n < 0)
387 return;
388 (*stderr_handler)(msg);
389 }
390
391
392 /*--------------------------------------------------------------------*
393 * Test files for equivalence *
394 *--------------------------------------------------------------------*/
395 /*!
396 * \brief filesAreIdentical()
397 *
398 * \param[in] fname1
399 * \param[in] fname2
400 * \param[out] psame 1 if identical; 0 if different
401 * \return 0 if OK, 1 on error
402 */
403 l_ok
404 filesAreIdentical(const char *fname1,
405 const char *fname2,
406 l_int32 *psame)
407 {
408 l_int32 i, same;
409 size_t nbytes1, nbytes2;
410 l_uint8 *array1, *array2;
411
412 if (!psame)
413 return ERROR_INT("&same not defined", __func__, 1);
414 *psame = 0;
415 if (!fname1 || !fname2)
416 return ERROR_INT("both names not defined", __func__, 1);
417
418 nbytes1 = nbytesInFile(fname1);
419 nbytes2 = nbytesInFile(fname2);
420 if (nbytes1 != nbytes2)
421 return 0;
422
423 if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL)
424 return ERROR_INT("array1 not read", __func__, 1);
425 if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL) {
426 LEPT_FREE(array1);
427 return ERROR_INT("array2 not read", __func__, 1);
428 }
429 same = 1;
430 for (i = 0; i < nbytes1; i++) {
431 if (array1[i] != array2[i]) {
432 same = 0;
433 break;
434 }
435 }
436 LEPT_FREE(array1);
437 LEPT_FREE(array2);
438 *psame = same;
439
440 return 0;
441 }
442
443
444 /*--------------------------------------------------------------------------*
445 * 16 and 32 bit byte-swapping on big endian and little endian machines *
446 *--------------------------------------------------------------------------*
447 * *
448 * These are typically used for I/O conversions: *
449 * (1) endian conversion for data that was read from a file *
450 * (2) endian conversion on data before it is written to a file *
451 *--------------------------------------------------------------------------*/
452
453 /*--------------------------------------------------------------------*
454 * 16-bit byte swapping *
455 *--------------------------------------------------------------------*/
456 #ifdef L_BIG_ENDIAN
457
458 l_uint16
459 convertOnBigEnd16(l_uint16 shortin)
460 {
461 return ((shortin << 8) | (shortin >> 8));
462 }
463
464 l_uint16
465 convertOnLittleEnd16(l_uint16 shortin)
466 {
467 return shortin;
468 }
469
470 #else /* L_LITTLE_ENDIAN */
471
472 l_uint16
473 convertOnLittleEnd16(l_uint16 shortin)
474 {
475 return ((shortin << 8) | (shortin >> 8));
476 }
477
478 l_uint16
479 convertOnBigEnd16(l_uint16 shortin)
480 {
481 return shortin;
482 }
483
484 #endif /* L_BIG_ENDIAN */
485
486
487 /*--------------------------------------------------------------------*
488 * 32-bit byte swapping *
489 *--------------------------------------------------------------------*/
490 #ifdef L_BIG_ENDIAN
491
492 l_uint32
493 convertOnBigEnd32(l_uint32 wordin)
494 {
495 return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
496 ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
497 }
498
499 l_uint32
500 convertOnLittleEnd32(l_uint32 wordin)
501 {
502 return wordin;
503 }
504
505 #else /* L_LITTLE_ENDIAN */
506
507 l_uint32
508 convertOnLittleEnd32(l_uint32 wordin)
509 {
510 return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
511 ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
512 }
513
514 l_uint32
515 convertOnBigEnd32(l_uint32 wordin)
516 {
517 return wordin;
518 }
519
520 #endif /* L_BIG_ENDIAN */
521
522
523 /*---------------------------------------------------------------------*
524 * File corruption and byte replacement operations *
525 *---------------------------------------------------------------------*/
526 /*!
527 * \brief fileCorruptByDeletion()
528 *
529 * \param[in] filein
530 * \param[in] loc fractional location of start of deletion
531 * \param[in] size fractional size of deletion
532 * \param[in] fileout corrupted file
533 * \return 0 if OK, 1 on error
534 *
535 * <pre>
536 * Notes:
537 * (1) %loc and %size are expressed as a fraction of the file size.
538 * (2) This makes a copy of the data in %filein, where bytes in the
539 * specified region have deleted.
540 * (3) If (%loc + %size) >= 1.0, this deletes from the position
541 * represented by %loc to the end of the file.
542 * (4) It is useful for testing robustness of I/O wrappers when the
543 * data is corrupted, by simulating data corruption by deletion.
544 * </pre>
545 */
546 l_ok
547 fileCorruptByDeletion(const char *filein,
548 l_float32 loc,
549 l_float32 size,
550 const char *fileout)
551 {
552 l_int32 i, locb, sizeb, rembytes;
553 size_t inbytes, outbytes;
554 l_uint8 *datain, *dataout;
555
556 if (!filein || !fileout)
557 return ERROR_INT("filein and fileout not both specified", __func__, 1);
558 if (loc < 0.0 || loc >= 1.0)
559 return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1);
560 if (size <= 0.0)
561 return ERROR_INT("size must be > 0.0", __func__, 1);
562 if (loc + size > 1.0)
563 size = 1.0f - loc;
564
565 datain = l_binaryRead(filein, &inbytes);
566 locb = (l_int32)(loc * inbytes + 0.5);
567 locb = L_MIN(locb, inbytes - 1);
568 sizeb = (l_int32)(size * inbytes + 0.5);
569 sizeb = L_MAX(1, sizeb);
570 sizeb = L_MIN(sizeb, inbytes - locb); /* >= 1 */
571 L_INFO("Removed %d bytes at location %d\n", __func__, sizeb, locb);
572 rembytes = inbytes - locb - sizeb; /* >= 0; to be copied, after excision */
573
574 outbytes = inbytes - sizeb;
575 dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1);
576 for (i = 0; i < locb; i++)
577 dataout[i] = datain[i];
578 for (i = 0; i < rembytes; i++)
579 dataout[locb + i] = datain[locb + sizeb + i];
580 l_binaryWrite(fileout, "w", dataout, outbytes);
581
582 LEPT_FREE(datain);
583 LEPT_FREE(dataout);
584 return 0;
585 }
586
587
588 /*!
589 * \brief fileCorruptByMutation()
590 *
591 * \param[in] filein
592 * \param[in] loc fractional location of start of randomization
593 * \param[in] size fractional size of randomization
594 * \param[in] fileout corrupted file
595 * \return 0 if OK, 1 on error
596 *
597 * <pre>
598 * Notes:
599 * (1) %loc and %size are expressed as a fraction of the file size.
600 * (2) This makes a copy of the data in %filein, where bytes in the
601 * specified region have been replaced by random data.
602 * (3) If (%loc + %size) >= 1.0, this modifies data from the position
603 * represented by %loc to the end of the file.
604 * (4) It is useful for testing robustness of I/O wrappers when the
605 * data is corrupted, by simulating data corruption.
606 * </pre>
607 */
608 l_ok
609 fileCorruptByMutation(const char *filein,
610 l_float32 loc,
611 l_float32 size,
612 const char *fileout)
613 {
614 l_int32 i, locb, sizeb;
615 size_t bytes;
616 l_uint8 *data;
617
618 if (!filein || !fileout)
619 return ERROR_INT("filein and fileout not both specified", __func__, 1);
620 if (loc < 0.0 || loc >= 1.0)
621 return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1);
622 if (size <= 0.0)
623 return ERROR_INT("size must be > 0.0", __func__, 1);
624 if (loc + size > 1.0)
625 size = 1.0f - loc;
626
627 data = l_binaryRead(filein, &bytes);
628 locb = (l_int32)(loc * bytes + 0.5);
629 locb = L_MIN(locb, bytes - 1);
630 sizeb = (l_int32)(size * bytes + 0.5);
631 sizeb = L_MAX(1, sizeb);
632 sizeb = L_MIN(sizeb, bytes - locb); /* >= 1 */
633 L_INFO("Randomizing %d bytes at location %d\n", __func__, sizeb, locb);
634
635 /* Make an array of random bytes and do the substitution */
636 for (i = 0; i < sizeb; i++) {
637 data[locb + i] =
638 (l_uint8)(255.9 * ((l_float64)rand() / (l_float64)RAND_MAX));
639 }
640
641 l_binaryWrite(fileout, "w", data, bytes);
642 LEPT_FREE(data);
643 return 0;
644 }
645
646
647 /*!
648 * \brief fileReplaceBytes()
649 *
650 * \param[in] filein input file
651 * \param[in] start start location for replacement
652 * \param[in] nbytes number of bytes to be removed
653 * \param[in] newdata replacement bytes
654 * \param[in] newsize size of replacement bytes
655 * \param[in] fileout output file
656 * \return 0 if OK, 1 on error
657 *
658 * <pre>
659 * Notes:
660 * (1) To remove %nbytes without replacement, set %newdata == NULL.
661 * (2) One use is for replacing the date/time in a pdf file by a
662 * string of 12 '0's, effectively removing the date without
663 * invalidating the byte counters in the pdf file:
664 * fileReplaceBytes(filein 86 12 (char *)"000000000000" 12 fileout
665 * </pre>
666 */
667 l_ok
668 fileReplaceBytes(const char *filein,
669 l_int32 start,
670 l_int32 nbytes,
671 l_uint8 *newdata,
672 size_t newsize,
673 const char *fileout)
674 {
675 l_int32 i, index;
676 size_t inbytes, outbytes;
677 l_uint8 *datain, *dataout;
678
679 if (!filein || !fileout)
680 return ERROR_INT("filein and fileout not both specified", __func__, 1);
681
682 datain = l_binaryRead(filein, &inbytes);
683 if (start + nbytes > inbytes)
684 L_WARNING("start + nbytes > length(filein) = %zu\n", __func__, inbytes);
685
686 if (!newdata) newsize = 0;
687 outbytes = inbytes - nbytes + newsize;
688 if ((dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1)) == NULL) {
689 LEPT_FREE(datain);
690 return ERROR_INT("calloc fail for dataout", __func__, 1);
691 }
692
693 for (i = 0; i < start; i++)
694 dataout[i] = datain[i];
695 for (i = start; i < start + newsize; i++)
696 dataout[i] = newdata[i - start];
697 index = start + nbytes; /* for datain */
698 start += newsize; /* for dataout */
699 for (i = start; i < outbytes; i++, index++)
700 dataout[i] = datain[index];
701 l_binaryWrite(fileout, "w", dataout, outbytes);
702
703 LEPT_FREE(datain);
704 LEPT_FREE(dataout);
705 return 0;
706 }
707
708
709 /*---------------------------------------------------------------------*
710 * Generate random integer in given interval *
711 *---------------------------------------------------------------------*/
712 /*!
713 * \brief genRandomIntOnInterval()
714 *
715 * \param[in] start beginning of interval; can be < 0
716 * \param[in] end end of interval; must be >= start
717 * \param[in] seed use 0 to skip; otherwise call srand
718 * \param[out] pval random integer in interval [start ... end]
719 * \return 0 if OK, 1 on error
720 */
721 l_ok
722 genRandomIntOnInterval(l_int32 start,
723 l_int32 end,
724 l_int32 seed,
725 l_int32 *pval)
726 {
727 l_float64 range;
728
729 if (!pval)
730 return ERROR_INT("&val not defined", __func__, 1);
731 *pval = 0;
732 if (end < start)
733 return ERROR_INT("invalid range", __func__, 1);
734
735 if (seed > 0) srand(seed);
736 range = (l_float64)(end - start + 1);
737 *pval = start + (l_int32)((l_float64)range *
738 ((l_float64)rand() / (l_float64)RAND_MAX));
739 return 0;
740 }
741
742
743 /*---------------------------------------------------------------------*
744 * Simple math functions *
745 *---------------------------------------------------------------------*/
746 /*!
747 * \brief lept_roundftoi()
748 *
749 * \param[in] fval
750 * \return value rounded to the nearest integer
751 *
752 * <pre>
753 * Notes:
754 * (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5)
755 * For fval < 0, fval --> -round(-fval))
756 * This is symmetric around 0.
757 * e.g., for fval in (-0.5 ... 0.5), fval --> 0
758 * </pre>
759 */
760 l_int32
761 lept_roundftoi(l_float32 fval)
762 {
763 return (fval >= 0.0) ? (l_int32)(fval + 0.5) : (l_int32)(fval - 0.5);
764 }
765
766
767 /*!
768 * \brief lept_floor()
769 *
770 * \param[in] fval
771 * \return largest integer that is not greater than %fval
772 */
773 l_int32
774 lept_floor(l_float32 fval)
775 {
776 return (fval >= 0.0) ? (l_int32)(fval) : -(l_int32)(-fval);
777 }
778
779
780 /*!
781 * \brief lept_ceiling()
782 *
783 * \param[in] fval
784 * \return smallest integer that is not less than %fval
785 *
786 * <pre>
787 * Notes:
788 * (1) If fval is equal to its interger value, return that.
789 * Otherwise:
790 * For fval > 0, fval --> 1 + floor(fval)
791 * For fval < 0, fval --> -(1 + floor(-fval))
792 * </pre>
793 */
794 l_int32
795 lept_ceiling(l_float32 fval)
796 {
797 return (fval == (l_int32)fval ? (l_int32)fval :
798 fval > 0.0 ? 1 + (l_int32)(fval) : -(1 + (l_int32)(-fval)));
799 }
800
801
802 /*---------------------------------------------------------------------*
803 * 64-bit hash functions *
804 *---------------------------------------------------------------------*/
805 /*!
806 * \brief l_hashStringToUint64()
807 *
808 * \param[in] str
809 * \param[out] phash hash value
810 * \return 0 if OK, 1 on error
811 *
812 * <pre>
813 * Notes:
814 * (1) The intent of the hash is to avoid collisions by mapping
815 * the string as randomly as possible into 64 bits.
816 * (2) To the extent that the hashes are random, the probability of
817 * a collision can be approximated by the square of the number
818 * of strings divided by 2^64. For 1 million strings, the
819 * collision probability is about 1 in 16 million.
820 * (3) I expect non-randomness of the distribution to be most evident
821 * for small text strings. This hash function has been tested
822 * for all 5-character text strings composed of 26 letters,
823 * of which there are 26^5 = 12356630. There are no hash
824 * collisions for this set.
825 * </pre>
826 */
827 l_ok
828 l_hashStringToUint64(const char *str,
829 l_uint64 *phash)
830 {
831 l_uint64 hash, mulp;
832
833 if (phash) *phash = 0;
834 if (!str || (str[0] == '\0'))
835 return ERROR_INT("str not defined or empty", __func__, 1);
836 if (!phash)
837 return ERROR_INT("&hash not defined", __func__, 1);
838
839 mulp = 26544357894361247; /* prime, about 1/700 of the max uint64 */
840 hash = 104395301;
841 while (*str) {
842 hash += (*str++ * mulp) ^ (hash >> 7); /* shift [1...23] are ok */
843 }
844 *phash = hash ^ (hash << 37);
845 return 0;
846 }
847
848
849 /*!
850 * \brief l_hashStringToUint64Fast()
851 *
852 * \param[in] str
853 * \param[out] phash hash value
854 * \return 0 if OK, 1 on error
855 *
856 * <pre>
857 * Notes:
858 * (1) This very simple hash algorithm is described in "The Practice
859 * of Programming" by Kernighan and Pike, p. 57 (1999).
860 * (2) The returned hash value would then be hashed into an index into
861 * the hashtable, using the mod operator with the hashtable size.
862 * </pre>
863 */
864 l_ok
865 l_hashStringToUint64Fast(const char *str,
866 l_uint64 *phash)
867 {
868 l_uint64 h;
869 l_uint8 *p;
870
871 if (phash) *phash = 0;
872 if (!str || (str[0] == '\0'))
873 return ERROR_INT("str not defined or empty", __func__, 1);
874 if (!phash)
875 return ERROR_INT("&hash not defined", __func__, 1);
876
877 h = 0;
878 for (p = (l_uint8 *)str; *p != '\0'; p++)
879 h = 37 * h + *p; /* 37 is good prime number for this */
880 *phash = h;
881 return 0;
882 }
883
884
885 /*!
886 * \brief l_hashPtToUint64()
887 *
888 * \param[in] x, y
889 * \param[out] phash hash value
890 * \return 0 if OK, 1 on error
891 *
892 * <pre>
893 * Notes:
894 * (1) This simple hash function has no collisions for
895 * any of 400 million points with x and y up to 20000.
896 * </pre>
897 */
898 l_ok
899 l_hashPtToUint64(l_int32 x,
900 l_int32 y,
901 l_uint64 *phash)
902 {
903 if (!phash)
904 return ERROR_INT("&hash not defined", __func__, 1);
905
906 *phash = (l_uint64)(2173249142.3849 * x + 3763193258.6227 * y);
907 return 0;
908 }
909
910
911 /*!
912 * \brief l_hashFloat64ToUint64()
913 *
914 * \param[in] val
915 * \param[out] phash hash key
916 * \return 0 if OK, 1 on error
917 *
918 * <pre>
919 * Notes:
920 * (1) This is a simple hash for using hashmaps with 64-bit float data.
921 * (2) The resulting hash is called a "key" in a lookup operation.
922 * The bucket for %val in a hashmap is then found by taking the mod
923 * of the hash key with the number of buckets (which is prime).
924 * </pre>
925 */
926 l_ok
927 l_hashFloat64ToUint64(l_float64 val,
928 l_uint64 *phash)
929 {
930 if (!phash)
931 return ERROR_INT("&hash not defined", __func__, 1);
932 val = (val >= 0.0) ? 847019.66701 * val : -217324.91613 * val;
933 *phash = (l_uint64)val;
934 return 0;
935 }
936
937
938 /*---------------------------------------------------------------------*
939 * Prime finders *
940 *---------------------------------------------------------------------*/
941 /*!
942 * \brief findNextLargerPrime()
943 *
944 * \param[in] start
945 * \param[out] pprime first prime larger than %start
946 * \return 0 if OK, 1 on error
947 */
948 l_ok
949 findNextLargerPrime(l_int32 start,
950 l_uint32 *pprime)
951 {
952 l_int32 i, is_prime;
953
954 if (!pprime)
955 return ERROR_INT("&prime not defined", __func__, 1);
956 *pprime = 0;
957 if (start <= 0)
958 return ERROR_INT("start must be > 0", __func__, 1);
959
960 for (i = start + 1; ; i++) {
961 lept_isPrime(i, &is_prime, NULL);
962 if (is_prime) {
963 *pprime = i;
964 return 0;
965 }
966 }
967
968 return ERROR_INT("prime not found!", __func__, 1);
969 }
970
971
972 /*!
973 * \brief lept_isPrime()
974 *
975 * \param[in] n 64-bit unsigned
976 * \param[out] pis_prime 1 if prime, 0 otherwise
977 * \param[out] pfactor [optional] smallest divisor, or 0 on error
978 * or if prime
979 * \return 0 if OK, 1 on error
980 */
981 l_ok
982 lept_isPrime(l_uint64 n,
983 l_int32 *pis_prime,
984 l_uint32 *pfactor)
985 {
986 l_uint32 div;
987 l_uint64 limit, ratio;
988
989 if (pis_prime) *pis_prime = 0;
990 if (pfactor) *pfactor = 0;
991 if (!pis_prime)
992 return ERROR_INT("&is_prime not defined", __func__, 1);
993 if (n <= 0)
994 return ERROR_INT("n must be > 0", __func__, 1);
995
996 if (n % 2 == 0) {
997 if (pfactor) *pfactor = 2;
998 return 0;
999 }
1000
1001 limit = (l_uint64)sqrt((l_float64)n);
1002 for (div = 3; div < limit; div += 2) {
1003 ratio = n / div;
1004 if (ratio * div == n) {
1005 if (pfactor) *pfactor = div;
1006 return 0;
1007 }
1008 }
1009
1010 *pis_prime = 1;
1011 return 0;
1012 }
1013
1014
1015 /*---------------------------------------------------------------------*
1016 * Gray code conversion *
1017 *---------------------------------------------------------------------*/
1018 /*!
1019 * \brief convertIntToGrayCode()
1020 *
1021 * \param[in] val integer value
1022 * \return corresponding gray code value
1023 *
1024 * <pre>
1025 * Notes:
1026 * (1) Gray code values corresponding to integers differ by
1027 * only one bit transition between successive integers.
1028 * </pre>
1029 */
1030 l_uint32
1031 convertIntToGrayCode(l_uint32 val)
1032 {
1033 return (val >> 1) ^ val;
1034 }
1035
1036
1037 /*!
1038 * \brief convertGrayCodeToInt()
1039 *
1040 * \param[in] val gray code value
1041 * \return corresponding integer value
1042 */
1043 l_uint32
1044 convertGrayCodeToInt(l_uint32 val)
1045 {
1046 l_uint32 shift;
1047
1048 for (shift = 1; shift < 32; shift <<= 1)
1049 val ^= val >> shift;
1050 return val;
1051 }
1052
1053
1054 /*---------------------------------------------------------------------*
1055 * Leptonica version number *
1056 *---------------------------------------------------------------------*/
1057 /*!
1058 * \brief getLeptonicaVersion()
1059 *
1060 * Return: string of version number (e.g., 'leptonica-1.74.2')
1061 *
1062 * Notes:
1063 * (1) The caller has responsibility to free the memory.
1064 */
1065 char *
1066 getLeptonicaVersion(void)
1067 {
1068 size_t bufsize = 100;
1069
1070 char *version = (char *)LEPT_CALLOC(bufsize, sizeof(char));
1071
1072 #ifdef _MSC_VER
1073 #ifdef _USRDLL
1074 char dllStr[] = "DLL";
1075 #else
1076 char dllStr[] = "LIB";
1077 #endif
1078 #ifdef _DEBUG
1079 char debugStr[] = "Debug";
1080 #else
1081 char debugStr[] = "Release";
1082 #endif
1083 #ifdef _M_IX86
1084 char bitStr[] = " x86";
1085 #elif _M_X64
1086 char bitStr[] = " x64";
1087 #else
1088 char bitStr[] = "";
1089 #endif
1090 snprintf(version, bufsize, "leptonica-%d.%d.%d (%s, %s) [MSC v.%d %s %s%s]",
1091 LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION,
1092 __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr);
1093
1094 #else
1095
1096 snprintf(version, bufsize, "leptonica-%d.%d.%d", LIBLEPT_MAJOR_VERSION,
1097 LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION);
1098
1099 #endif /* _MSC_VER */
1100 return version;
1101 }
1102
1103
1104 /*---------------------------------------------------------------------*
1105 * Timing procs *
1106 *---------------------------------------------------------------------*/
1107 #if !defined(_WIN32) && !defined(__Fuchsia__)
1108
1109 #include <sys/time.h>
1110 #include <sys/resource.h>
1111
1112 static struct rusage rusage_before;
1113 static struct rusage rusage_after;
1114
1115 /*!
1116 * \brief startTimer(), stopTimer()
1117 *
1118 * Notes:
1119 * (1) These measure the cpu time elapsed between the two calls:
1120 * startTimer();
1121 * ....
1122 * lept_stderr( "Elapsed time = %7.3f sec\n", stopTimer());
1123 */
1124 void
1125 startTimer(void)
1126 {
1127 getrusage(RUSAGE_SELF, &rusage_before);
1128 }
1129
1130 l_float32
1131 stopTimer(void)
1132 {
1133 l_int32 tsec, tusec;
1134
1135 getrusage(RUSAGE_SELF, &rusage_after);
1136
1137 tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec;
1138 tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec;
1139 return (tsec + ((l_float32)tusec) / 1000000.0);
1140 }
1141
1142
1143 /*!
1144 * \brief startTimerNested(), stopTimerNested()
1145 *
1146 * Example of usage:
1147 *
1148 * L_TIMER t1 = startTimerNested();
1149 * ....
1150 * L_TIMER t2 = startTimerNested();
1151 * ....
1152 * lept_stderr( "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2));
1153 * ....
1154 * lept_stderr( "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1));
1155 */
1156 L_TIMER
1157 startTimerNested(void)
1158 {
1159 struct rusage *rusage_start;
1160
1161 rusage_start = (struct rusage *)LEPT_CALLOC(1, sizeof(struct rusage));
1162 getrusage(RUSAGE_SELF, rusage_start);
1163 return rusage_start;
1164 }
1165
1166 l_float32
1167 stopTimerNested(L_TIMER rusage_start)
1168 {
1169 l_int32 tsec, tusec;
1170 struct rusage rusage_stop;
1171
1172 getrusage(RUSAGE_SELF, &rusage_stop);
1173
1174 tsec = rusage_stop.ru_utime.tv_sec -
1175 ((struct rusage *)rusage_start)->ru_utime.tv_sec;
1176 tusec = rusage_stop.ru_utime.tv_usec -
1177 ((struct rusage *)rusage_start)->ru_utime.tv_usec;
1178 LEPT_FREE(rusage_start);
1179 return (tsec + ((l_float32)tusec) / 1000000.0);
1180 }
1181
1182
1183 /*!
1184 * \brief l_getCurrentTime()
1185 *
1186 * \param[out] sec [optional] in seconds since birth of Unix
1187 * \param[out] usec [optional] in microseconds since birth of Unix
1188 * \return void
1189 */
1190 void
1191 l_getCurrentTime(l_int32 *sec,
1192 l_int32 *usec)
1193 {
1194 struct timeval tv;
1195
1196 gettimeofday(&tv, NULL);
1197 if (sec) *sec = (l_int32)tv.tv_sec;
1198 if (usec) *usec = (l_int32)tv.tv_usec;
1199 }
1200
1201 #elif defined(__Fuchsia__) /* resource.h not implemented on Fuchsia. */
1202
1203 /* Timer functions are used for testing and debugging, and
1204 * are stubbed out. If they are needed in the future, they
1205 * can be implemented in Fuchsia using the zircon syscall
1206 * zx_object_get_info() in ZX_INFOR_THREAD_STATS mode. */
1207 void
1208 startTimer(void)
1209 {
1210 }
1211
1212 l_float32
1213 stopTimer(void)
1214 {
1215 return 0.0;
1216 }
1217
1218 L_TIMER
1219 startTimerNested(void)
1220 {
1221 return NULL;
1222 }
1223
1224 l_float32
1225 stopTimerNested(L_TIMER rusage_start)
1226 {
1227 return 0.0;
1228 }
1229
1230 void
1231 l_getCurrentTime(l_int32 *sec,
1232 l_int32 *usec)
1233 {
1234 }
1235
1236 #else /* _WIN32 : resource.h not implemented under Windows */
1237
1238 /* Note: if division by 10^7 seems strange, the time is expressed
1239 * as the number of 100-nanosecond intervals that have elapsed
1240 * since 12:00 A.M. January 1, 1601. */
1241
1242 static ULARGE_INTEGER utime_before;
1243 static ULARGE_INTEGER utime_after;
1244
1245 void
1246 startTimer(void)
1247 {
1248 HANDLE this_process;
1249 FILETIME start, stop, kernel, user;
1250
1251 this_process = GetCurrentProcess();
1252
1253 GetProcessTimes(this_process, &start, &stop, &kernel, &user);
1254
1255 utime_before.LowPart = user.dwLowDateTime;
1256 utime_before.HighPart = user.dwHighDateTime;
1257 }
1258
1259 l_float32
1260 stopTimer(void)
1261 {
1262 HANDLE this_process;
1263 FILETIME start, stop, kernel, user;
1264 ULONGLONG hnsec; /* in units of hecto-nanosecond (100 ns) intervals */
1265
1266 this_process = GetCurrentProcess();
1267
1268 GetProcessTimes(this_process, &start, &stop, &kernel, &user);
1269
1270 utime_after.LowPart = user.dwLowDateTime;
1271 utime_after.HighPart = user.dwHighDateTime;
1272 hnsec = utime_after.QuadPart - utime_before.QuadPart;
1273 return (l_float32)(signed)hnsec / 10000000.0f;
1274 }
1275
1276 L_TIMER
1277 startTimerNested(void)
1278 {
1279 HANDLE this_process;
1280 FILETIME start, stop, kernel, user;
1281 ULARGE_INTEGER *utime_start;
1282
1283 this_process = GetCurrentProcess();
1284
1285 GetProcessTimes (this_process, &start, &stop, &kernel, &user);
1286
1287 utime_start = (ULARGE_INTEGER *)LEPT_CALLOC(1, sizeof(ULARGE_INTEGER));
1288 utime_start->LowPart = user.dwLowDateTime;
1289 utime_start->HighPart = user.dwHighDateTime;
1290 return utime_start;
1291 }
1292
1293 l_float32
1294 stopTimerNested(L_TIMER utime_start)
1295 {
1296 HANDLE this_process;
1297 FILETIME start, stop, kernel, user;
1298 ULARGE_INTEGER utime_stop;
1299 ULONGLONG hnsec; /* in units of 100 ns intervals */
1300
1301 this_process = GetCurrentProcess ();
1302
1303 GetProcessTimes (this_process, &start, &stop, &kernel, &user);
1304
1305 utime_stop.LowPart = user.dwLowDateTime;
1306 utime_stop.HighPart = user.dwHighDateTime;
1307 hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart;
1308 LEPT_FREE(utime_start);
1309 return (l_float32)(signed)hnsec / 10000000.0f;
1310 }
1311
1312 void
1313 l_getCurrentTime(l_int32 *sec,
1314 l_int32 *usec)
1315 {
1316 ULARGE_INTEGER utime, birthunix;
1317 FILETIME systemtime;
1318 LONGLONG birthunixhnsec = 116444736000000000; /*in units of 100 ns */
1319 LONGLONG usecs;
1320
1321 GetSystemTimeAsFileTime(&systemtime);
1322 utime.LowPart = systemtime.dwLowDateTime;
1323 utime.HighPart = systemtime.dwHighDateTime;
1324
1325 birthunix.LowPart = (DWORD) birthunixhnsec;
1326 birthunix.HighPart = birthunixhnsec >> 32;
1327
1328 usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10);
1329
1330 if (sec) *sec = (l_int32) (usecs / 1000000);
1331 if (usec) *usec = (l_int32) (usecs % 1000000);
1332 }
1333
1334 #endif
1335
1336
1337 /*!
1338 * \brief startWallTimer()
1339 *
1340 * \return walltimer-ptr
1341 *
1342 * <pre>
1343 * Notes:
1344 * (1) These measure the wall clock time elapsed between the two calls:
1345 * L_WALLTIMER *timer = startWallTimer();
1346 * ....
1347 * lept_stderr( "Elapsed time = %f sec\n", stopWallTimer(&timer);
1348 * (2) Note that the timer object is destroyed by stopWallTimer().
1349 * </pre>
1350 */
1351 L_WALLTIMER *
1352 startWallTimer(void)
1353 {
1354 L_WALLTIMER *timer;
1355
1356 timer = (L_WALLTIMER *)LEPT_CALLOC(1, sizeof(L_WALLTIMER));
1357 l_getCurrentTime(&timer->start_sec, &timer->start_usec);
1358 return timer;
1359 }
1360
1361 /*!
1362 * \brief stopWallTimer()
1363 *
1364 * \param[in,out] ptimer walltimer pointer
1365 * \return time wall time elapsed in seconds
1366 */
1367 l_float32
1368 stopWallTimer(L_WALLTIMER **ptimer)
1369 {
1370 l_int32 tsec, tusec;
1371 L_WALLTIMER *timer;
1372
1373 if (!ptimer)
1374 return (l_float32)ERROR_FLOAT("&timer not defined", __func__, 0.0);
1375 timer = *ptimer;
1376 if (!timer)
1377 return (l_float32)ERROR_FLOAT("timer not defined", __func__, 0.0);
1378
1379 l_getCurrentTime(&timer->stop_sec, &timer->stop_usec);
1380 tsec = timer->stop_sec - timer->start_sec;
1381 tusec = timer->stop_usec - timer->start_usec;
1382 LEPT_FREE(timer);
1383 *ptimer = NULL;
1384 return (tsec + ((l_float32)tusec) / 1000000.0f);
1385 }
1386
1387
1388 /*!
1389 * \brief l_getFormattedDate()
1390 *
1391 * \return formatted date string, or NULL on error
1392 *
1393 * <pre>
1394 * Notes:
1395 * (1) This is used in pdf, in the form specified in section 3.8.2 of
1396 * http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
1397 * (2) Contributed by Dave Bryan. Works on all platforms.
1398 * </pre>
1399 */
1400 char *
1401 l_getFormattedDate(void)
1402 {
1403 char buf[128] = "", sep = 'Z';
1404 l_int32 gmt_offset, relh, relm;
1405 time_t ut, lt;
1406 struct tm Tm;
1407 struct tm *tptr = &Tm;
1408
1409 ut = time(NULL);
1410
1411 /* This generates a second "time_t" value by calling "gmtime" to
1412 fill in a "tm" structure expressed as UTC and then calling
1413 "mktime", which expects a "tm" structure expressed as the
1414 local time. The result is a value that is offset from the
1415 value returned by the "time" function by the local UTC offset.
1416 "tm_isdst" is set to -1 to tell "mktime" to determine for
1417 itself whether DST is in effect. This is necessary because
1418 "gmtime" always sets "tm_isdst" to 0, which would tell
1419 "mktime" to presume that DST is not in effect. */
1420 #ifdef _WIN32
1421 #ifdef _MSC_VER
1422 gmtime_s(tptr, &ut);
1423 #else /* mingw */
1424 tptr = gmtime(&ut);
1425 #endif
1426 #else
1427 gmtime_r(&ut, tptr);
1428 #endif
1429 tptr->tm_isdst = -1;
1430 lt = mktime(tptr);
1431
1432 /* Calls "difftime" to obtain the resulting difference in seconds,
1433 * because "time_t" is an opaque type, per the C standard. */
1434 gmt_offset = (l_int32) difftime(ut, lt);
1435 if (gmt_offset > 0)
1436 sep = '+';
1437 else if (gmt_offset < 0)
1438 sep = '-';
1439 relh = L_ABS(gmt_offset) / 3600;
1440 relm = (L_ABS(gmt_offset) % 3600) / 60;
1441
1442 #ifdef _WIN32
1443 #ifdef _MSC_VER
1444 localtime_s(tptr, &ut);
1445 #else /* mingw */
1446 tptr = localtime(&ut);
1447 #endif
1448 #else
1449 localtime_r(&ut, tptr);
1450 #endif
1451 strftime(buf, sizeof(buf), "%Y%m%d%H%M%S", tptr);
1452 sprintf(buf + 14, "%c%02d'%02d'", sep, relh, relm);
1453 return stringNew(buf);
1454 }