Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/thirdparty/leptonica/src/utils1.c @ 45:b74429b0f5c4 v1.26.5+1
+++++ v1.26.5+1
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Oct 2025 17:17:13 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file utils1.c * <pre> * * ------------------------------------------ * This file has these utilities: * - error, warning and info messages * - redirection of stderr * - low-level endian conversions * - file corruption operations * - random and prime number operations * - 64-bit hash functions * - leptonica version number accessor * - timing and date operations * ------------------------------------------ * * Control of error, warning and info messages * l_int32 setMsgSeverity() * * Error return functions, invoked by macros * l_int32 returnErrorInt() * l_float32 returnErrorFloat() * void *returnErrorPtr() * l_int32 returnErrorInt1() * l_float32 returnErrorFloat1() * void *returnErrorPtr1() * * Runtime redirection of stderr * void leptSetStderrHandler() * void lept_stderr() * * Test files for equivalence * l_int32 filesAreIdentical() * * Byte-swapping data conversion * l_uint16 convertOnBigEnd16() * l_uint32 convertOnBigEnd32() * l_uint16 convertOnLittleEnd16() * l_uint32 convertOnLittleEnd32() * * File corruption and byte replacement operations * l_int32 fileCorruptByDeletion() * l_int32 fileCorruptByMutation() * l_int32 fileReplaceBytes() * * Generate random integer in given interval * l_int32 genRandomIntOnInterval() * * Simple math functions * l_int32 lept_roundftoi() * l_int32 lept_floor() * l_int32 lept_ceiling() * * 64-bit hash functions * l_int32 l_hashStringToUint64() * l_int32 l_hashStringToUint64Fast() * l_int32 l_hashPtToUint64() * l_int32 l_hashFloat64ToUint64() * * Prime finders * l_int32 findNextLargerPrime() * l_int32 lept_isPrime() * * Gray code conversion * l_uint32 convertIntToGrayCode() * l_uint32 convertGrayCodeToInt() * * Leptonica version number * char *getLeptonicaVersion() * * Timing * void startTimer() * l_float32 stopTimer() * L_TIMER startTimerNested() * l_float32 stopTimerNested() * void l_getCurrentTime() * L_WALLTIMER *startWallTimer() * l_float32 stopWallTimer() * void l_getFormattedDate() * * For all issues with cross-platform development, see utils2.c. * </pre> */ #ifdef HAVE_CONFIG_H #include <config_auto.h> #endif /* HAVE_CONFIG_H */ #ifdef _WIN32 #include <windows.h> #endif /* _WIN32 */ #include <time.h> #include "allheaders.h" #include <math.h> /* Global for controlling message output at runtime */ LEPT_DLL l_int32 LeptMsgSeverity = DEFAULT_SEVERITY; #define DEBUG_SEV 0 /*----------------------------------------------------------------------* * Control of error, warning and info messages * *----------------------------------------------------------------------*/ /*! * \brief setMsgSeverity() * * \param[in] newsev * \return oldsev * * <pre> * Notes: * (1) setMsgSeverity() allows the user to specify the desired * message severity threshold. Messages of equal or greater * severity will be output. The previous message severity is * returned when the new severity is set. * (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be * obtained from the LEPT_MSG_SEVERITY environment variable. * </pre> */ l_int32 setMsgSeverity(l_int32 newsev) { l_int32 oldsev; char *envsev; oldsev = LeptMsgSeverity; if (newsev == L_SEVERITY_EXTERNAL) { envsev = getenv("LEPT_MSG_SEVERITY"); if (envsev) { LeptMsgSeverity = atoi(envsev); #if DEBUG_SEV L_INFO("message severity set to external\n", "setMsgSeverity"); #endif /* DEBUG_SEV */ } else { #if DEBUG_SEV L_WARNING("environment var LEPT_MSG_SEVERITY not defined\n", "setMsgSeverity"); #endif /* DEBUG_SEV */ } } else { LeptMsgSeverity = newsev; #if DEBUG_SEV L_INFO("message severity set to %d\n", "setMsgSeverity", newsev); #endif /* DEBUG_SEV */ } return oldsev; } /*----------------------------------------------------------------------* * Error return functions, invoked by macros * *----------------------------------------------------------------------* * * * (1) These error functions print messages to stderr and allow * * exit from the function that called them. * * (2) They must be invoked only by the three argument macros * * ERROR_INT, ERROR_FLOAT, ERROR_PTR * * or the four argument macros * * ERROR_INT_1, ERROR_FLOAT_1, ERROR_PTR_1 * * which are in environ.h. * * (3) The print output can be disabled at compile time, either * * by using -DNO_CONSOLE_IO or by setting LeptMsgSeverity. * *----------------------------------------------------------------------*/ /*! * \brief returnErrorInt() * * \param[in] msg error message * \param[in] procname use __func__ * \param[in] ival return error val (typically 1 for an error) * \return ival */ l_int32 returnErrorInt(const char *msg, const char *procname, l_int32 ival) { lept_stderr("Error in %s: %s\n", procname, msg); return ival; } /*! * \brief returnErrorFloat() * * \param[in] msg error message * \param[in] procname use __func__ * \param[in] fval return float error val * \return fval */ l_float32 returnErrorFloat(const char *msg, const char *procname, l_float32 fval) { lept_stderr("Error in %s: %s\n", procname, msg); return fval; } /*! * \brief returnErrorPtr() * * \param[in] msg error message * \param[in] procname use __func__ * \param[in] pval return error val (typically null for an error) * \return pval */ void * returnErrorPtr(const char *msg, const char *procname, void *pval) { lept_stderr("Error in %s: %s\n", procname, msg); return pval; } /*! * \brief returnErrorInt1() * * \param[in] msg error message * \param[in] arg additional error message argument * (will be appended to the error message) * \param[in] procname use __func__ * \param[in] ival return error val; typically 1 for an error return * \return ival typically 1 for an error return */ l_int32 returnErrorInt1(const char *msg, const char *arg, const char *procname, l_int32 ival) { lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg); return ival; } /*! * \brief returnErrorFloat1() * * \param[in] msg error message * \param[in] arg additional error message argument * (will be appended to the error message) * \param[in] procname use __func__ * \param[in] fval return float error val * \return fval */ l_float32 returnErrorFloat1(const char *msg, const char *arg, const char *procname, l_float32 fval) { lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg); return fval; } /*! * \brief returnErrorPtr1() * * \param[in] msg error message * \param[in] arg additional error message argument * (will be appended to the error message) * \param[in] procname use __func__ * \param[in] pval return error val (typically null for an error) * \return pval */ void * returnErrorPtr1(const char *msg, const char *arg, const char *procname, void *pval) { lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg); return pval; } /*------------------------------------------------------------------------* * Runtime redirection of stderr * *------------------------------------------------------------------------* * * * The user can provide a callback function to redirect messages * * that would otherwise go to stderr. Here are two examples: * * (1) to stop all messages: * * void send_to_devnull(const char *msg) {} * * (2) to write to the system logger: * * void send_to_syslog(const char *msg) { * * syslog(1, msg); * * } * * These would then be registered using * * leptSetStderrHandler(send_to_devnull); * * and * * leptSetStderrHandler(send_to_syslog); * *------------------------------------------------------------------------*/ /* By default, all messages go to stderr */ static void lept_default_stderr_handler(const char *formatted_msg) { if (formatted_msg) fputs(formatted_msg, stderr); } /* The stderr callback handler is private to leptonica. * By default it writes to stderr. */ void (*stderr_handler)(const char *) = lept_default_stderr_handler; /*! * \brief leptSetStderrHandler() * * \param[in] handler callback function for lept_stderr output * \return void * * <pre> * Notes: * (1) This registers a handler for redirection of output to stderr * at runtime. * (2) If called with NULL, the output goes to stderr. * </pre> */ void leptSetStderrHandler(void (*handler)(const char *)) { if (handler) stderr_handler = handler; else stderr_handler = lept_default_stderr_handler; } #define MAX_DEBUG_MESSAGE 2000 /*! * \brief lept_stderr() * * \param[in] fmt format string * \param[in] ... varargs * \return void * * <pre> * Notes: * (1) This is a replacement for fprintf(), to allow redirection * of output. All calls to fprintf(stderr, ...) are replaced * with calls to lept_stderr(...). * (2) The message size is limited to 2K bytes. (3) This utility was provided by jbarlow83. * </pre> */ void lept_stderr(const char *fmt, ...) { va_list args; char msg[MAX_DEBUG_MESSAGE]; l_int32 n; va_start(args, fmt); n = vsnprintf(msg, sizeof(msg), fmt, args); va_end(args); if (n < 0) return; (*stderr_handler)(msg); } /*--------------------------------------------------------------------* * Test files for equivalence * *--------------------------------------------------------------------*/ /*! * \brief filesAreIdentical() * * \param[in] fname1 * \param[in] fname2 * \param[out] psame 1 if identical; 0 if different * \return 0 if OK, 1 on error */ l_ok filesAreIdentical(const char *fname1, const char *fname2, l_int32 *psame) { l_int32 i, same; size_t nbytes1, nbytes2; l_uint8 *array1, *array2; if (!psame) return ERROR_INT("&same not defined", __func__, 1); *psame = 0; if (!fname1 || !fname2) return ERROR_INT("both names not defined", __func__, 1); nbytes1 = nbytesInFile(fname1); nbytes2 = nbytesInFile(fname2); if (nbytes1 != nbytes2) return 0; if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL) return ERROR_INT("array1 not read", __func__, 1); if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL) { LEPT_FREE(array1); return ERROR_INT("array2 not read", __func__, 1); } same = 1; for (i = 0; i < nbytes1; i++) { if (array1[i] != array2[i]) { same = 0; break; } } LEPT_FREE(array1); LEPT_FREE(array2); *psame = same; return 0; } /*--------------------------------------------------------------------------* * 16 and 32 bit byte-swapping on big endian and little endian machines * *--------------------------------------------------------------------------* * * * These are typically used for I/O conversions: * * (1) endian conversion for data that was read from a file * * (2) endian conversion on data before it is written to a file * *--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------* * 16-bit byte swapping * *--------------------------------------------------------------------*/ #ifdef L_BIG_ENDIAN l_uint16 convertOnBigEnd16(l_uint16 shortin) { return ((shortin << 8) | (shortin >> 8)); } l_uint16 convertOnLittleEnd16(l_uint16 shortin) { return shortin; } #else /* L_LITTLE_ENDIAN */ l_uint16 convertOnLittleEnd16(l_uint16 shortin) { return ((shortin << 8) | (shortin >> 8)); } l_uint16 convertOnBigEnd16(l_uint16 shortin) { return shortin; } #endif /* L_BIG_ENDIAN */ /*--------------------------------------------------------------------* * 32-bit byte swapping * *--------------------------------------------------------------------*/ #ifdef L_BIG_ENDIAN l_uint32 convertOnBigEnd32(l_uint32 wordin) { return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); } l_uint32 convertOnLittleEnd32(l_uint32 wordin) { return wordin; } #else /* L_LITTLE_ENDIAN */ l_uint32 convertOnLittleEnd32(l_uint32 wordin) { return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) | ((wordin >> 8) & 0x0000ff00) | (wordin >> 24)); } l_uint32 convertOnBigEnd32(l_uint32 wordin) { return wordin; } #endif /* L_BIG_ENDIAN */ /*---------------------------------------------------------------------* * File corruption and byte replacement operations * *---------------------------------------------------------------------*/ /*! * \brief fileCorruptByDeletion() * * \param[in] filein * \param[in] loc fractional location of start of deletion * \param[in] size fractional size of deletion * \param[in] fileout corrupted file * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) %loc and %size are expressed as a fraction of the file size. * (2) This makes a copy of the data in %filein, where bytes in the * specified region have deleted. * (3) If (%loc + %size) >= 1.0, this deletes from the position * represented by %loc to the end of the file. * (4) It is useful for testing robustness of I/O wrappers when the * data is corrupted, by simulating data corruption by deletion. * </pre> */ l_ok fileCorruptByDeletion(const char *filein, l_float32 loc, l_float32 size, const char *fileout) { l_int32 i, locb, sizeb, rembytes; size_t inbytes, outbytes; l_uint8 *datain, *dataout; if (!filein || !fileout) return ERROR_INT("filein and fileout not both specified", __func__, 1); if (loc < 0.0 || loc >= 1.0) return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1); if (size <= 0.0) return ERROR_INT("size must be > 0.0", __func__, 1); if (loc + size > 1.0) size = 1.0f - loc; datain = l_binaryRead(filein, &inbytes); locb = (l_int32)(loc * inbytes + 0.5); locb = L_MIN(locb, inbytes - 1); sizeb = (l_int32)(size * inbytes + 0.5); sizeb = L_MAX(1, sizeb); sizeb = L_MIN(sizeb, inbytes - locb); /* >= 1 */ L_INFO("Removed %d bytes at location %d\n", __func__, sizeb, locb); rembytes = inbytes - locb - sizeb; /* >= 0; to be copied, after excision */ outbytes = inbytes - sizeb; dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1); for (i = 0; i < locb; i++) dataout[i] = datain[i]; for (i = 0; i < rembytes; i++) dataout[locb + i] = datain[locb + sizeb + i]; l_binaryWrite(fileout, "w", dataout, outbytes); LEPT_FREE(datain); LEPT_FREE(dataout); return 0; } /*! * \brief fileCorruptByMutation() * * \param[in] filein * \param[in] loc fractional location of start of randomization * \param[in] size fractional size of randomization * \param[in] fileout corrupted file * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) %loc and %size are expressed as a fraction of the file size. * (2) This makes a copy of the data in %filein, where bytes in the * specified region have been replaced by random data. * (3) If (%loc + %size) >= 1.0, this modifies data from the position * represented by %loc to the end of the file. * (4) It is useful for testing robustness of I/O wrappers when the * data is corrupted, by simulating data corruption. * </pre> */ l_ok fileCorruptByMutation(const char *filein, l_float32 loc, l_float32 size, const char *fileout) { l_int32 i, locb, sizeb; size_t bytes; l_uint8 *data; if (!filein || !fileout) return ERROR_INT("filein and fileout not both specified", __func__, 1); if (loc < 0.0 || loc >= 1.0) return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1); if (size <= 0.0) return ERROR_INT("size must be > 0.0", __func__, 1); if (loc + size > 1.0) size = 1.0f - loc; data = l_binaryRead(filein, &bytes); locb = (l_int32)(loc * bytes + 0.5); locb = L_MIN(locb, bytes - 1); sizeb = (l_int32)(size * bytes + 0.5); sizeb = L_MAX(1, sizeb); sizeb = L_MIN(sizeb, bytes - locb); /* >= 1 */ L_INFO("Randomizing %d bytes at location %d\n", __func__, sizeb, locb); /* Make an array of random bytes and do the substitution */ for (i = 0; i < sizeb; i++) { data[locb + i] = (l_uint8)(255.9 * ((l_float64)rand() / (l_float64)RAND_MAX)); } l_binaryWrite(fileout, "w", data, bytes); LEPT_FREE(data); return 0; } /*! * \brief fileReplaceBytes() * * \param[in] filein input file * \param[in] start start location for replacement * \param[in] nbytes number of bytes to be removed * \param[in] newdata replacement bytes * \param[in] newsize size of replacement bytes * \param[in] fileout output file * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) To remove %nbytes without replacement, set %newdata == NULL. * (2) One use is for replacing the date/time in a pdf file by a * string of 12 '0's, effectively removing the date without * invalidating the byte counters in the pdf file: * fileReplaceBytes(filein 86 12 (char *)"000000000000" 12 fileout * </pre> */ l_ok fileReplaceBytes(const char *filein, l_int32 start, l_int32 nbytes, l_uint8 *newdata, size_t newsize, const char *fileout) { l_int32 i, index; size_t inbytes, outbytes; l_uint8 *datain, *dataout; if (!filein || !fileout) return ERROR_INT("filein and fileout not both specified", __func__, 1); datain = l_binaryRead(filein, &inbytes); if (start + nbytes > inbytes) L_WARNING("start + nbytes > length(filein) = %zu\n", __func__, inbytes); if (!newdata) newsize = 0; outbytes = inbytes - nbytes + newsize; if ((dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1)) == NULL) { LEPT_FREE(datain); return ERROR_INT("calloc fail for dataout", __func__, 1); } for (i = 0; i < start; i++) dataout[i] = datain[i]; for (i = start; i < start + newsize; i++) dataout[i] = newdata[i - start]; index = start + nbytes; /* for datain */ start += newsize; /* for dataout */ for (i = start; i < outbytes; i++, index++) dataout[i] = datain[index]; l_binaryWrite(fileout, "w", dataout, outbytes); LEPT_FREE(datain); LEPT_FREE(dataout); return 0; } /*---------------------------------------------------------------------* * Generate random integer in given interval * *---------------------------------------------------------------------*/ /*! * \brief genRandomIntOnInterval() * * \param[in] start beginning of interval; can be < 0 * \param[in] end end of interval; must be >= start * \param[in] seed use 0 to skip; otherwise call srand * \param[out] pval random integer in interval [start ... end] * \return 0 if OK, 1 on error */ l_ok genRandomIntOnInterval(l_int32 start, l_int32 end, l_int32 seed, l_int32 *pval) { l_float64 range; if (!pval) return ERROR_INT("&val not defined", __func__, 1); *pval = 0; if (end < start) return ERROR_INT("invalid range", __func__, 1); if (seed > 0) srand(seed); range = (l_float64)(end - start + 1); *pval = start + (l_int32)((l_float64)range * ((l_float64)rand() / (l_float64)RAND_MAX)); return 0; } /*---------------------------------------------------------------------* * Simple math functions * *---------------------------------------------------------------------*/ /*! * \brief lept_roundftoi() * * \param[in] fval * \return value rounded to the nearest integer * * <pre> * Notes: * (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5) * For fval < 0, fval --> -round(-fval)) * This is symmetric around 0. * e.g., for fval in (-0.5 ... 0.5), fval --> 0 * </pre> */ l_int32 lept_roundftoi(l_float32 fval) { return (fval >= 0.0) ? (l_int32)(fval + 0.5) : (l_int32)(fval - 0.5); } /*! * \brief lept_floor() * * \param[in] fval * \return largest integer that is not greater than %fval */ l_int32 lept_floor(l_float32 fval) { return (fval >= 0.0) ? (l_int32)(fval) : -(l_int32)(-fval); } /*! * \brief lept_ceiling() * * \param[in] fval * \return smallest integer that is not less than %fval * * <pre> * Notes: * (1) If fval is equal to its interger value, return that. * Otherwise: * For fval > 0, fval --> 1 + floor(fval) * For fval < 0, fval --> -(1 + floor(-fval)) * </pre> */ l_int32 lept_ceiling(l_float32 fval) { return (fval == (l_int32)fval ? (l_int32)fval : fval > 0.0 ? 1 + (l_int32)(fval) : -(1 + (l_int32)(-fval))); } /*---------------------------------------------------------------------* * 64-bit hash functions * *---------------------------------------------------------------------*/ /*! * \brief l_hashStringToUint64() * * \param[in] str * \param[out] phash hash value * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) The intent of the hash is to avoid collisions by mapping * the string as randomly as possible into 64 bits. * (2) To the extent that the hashes are random, the probability of * a collision can be approximated by the square of the number * of strings divided by 2^64. For 1 million strings, the * collision probability is about 1 in 16 million. * (3) I expect non-randomness of the distribution to be most evident * for small text strings. This hash function has been tested * for all 5-character text strings composed of 26 letters, * of which there are 26^5 = 12356630. There are no hash * collisions for this set. * </pre> */ l_ok l_hashStringToUint64(const char *str, l_uint64 *phash) { l_uint64 hash, mulp; if (phash) *phash = 0; if (!str || (str[0] == '\0')) return ERROR_INT("str not defined or empty", __func__, 1); if (!phash) return ERROR_INT("&hash not defined", __func__, 1); mulp = 26544357894361247; /* prime, about 1/700 of the max uint64 */ hash = 104395301; while (*str) { hash += (*str++ * mulp) ^ (hash >> 7); /* shift [1...23] are ok */ } *phash = hash ^ (hash << 37); return 0; } /*! * \brief l_hashStringToUint64Fast() * * \param[in] str * \param[out] phash hash value * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This very simple hash algorithm is described in "The Practice * of Programming" by Kernighan and Pike, p. 57 (1999). * (2) The returned hash value would then be hashed into an index into * the hashtable, using the mod operator with the hashtable size. * </pre> */ l_ok l_hashStringToUint64Fast(const char *str, l_uint64 *phash) { l_uint64 h; l_uint8 *p; if (phash) *phash = 0; if (!str || (str[0] == '\0')) return ERROR_INT("str not defined or empty", __func__, 1); if (!phash) return ERROR_INT("&hash not defined", __func__, 1); h = 0; for (p = (l_uint8 *)str; *p != '\0'; p++) h = 37 * h + *p; /* 37 is good prime number for this */ *phash = h; return 0; } /*! * \brief l_hashPtToUint64() * * \param[in] x, y * \param[out] phash hash value * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This simple hash function has no collisions for * any of 400 million points with x and y up to 20000. * </pre> */ l_ok l_hashPtToUint64(l_int32 x, l_int32 y, l_uint64 *phash) { if (!phash) return ERROR_INT("&hash not defined", __func__, 1); *phash = (l_uint64)(2173249142.3849 * x + 3763193258.6227 * y); return 0; } /*! * \brief l_hashFloat64ToUint64() * * \param[in] val * \param[out] phash hash key * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) This is a simple hash for using hashmaps with 64-bit float data. * (2) The resulting hash is called a "key" in a lookup operation. * The bucket for %val in a hashmap is then found by taking the mod * of the hash key with the number of buckets (which is prime). * </pre> */ l_ok l_hashFloat64ToUint64(l_float64 val, l_uint64 *phash) { if (!phash) return ERROR_INT("&hash not defined", __func__, 1); val = (val >= 0.0) ? 847019.66701 * val : -217324.91613 * val; *phash = (l_uint64)val; return 0; } /*---------------------------------------------------------------------* * Prime finders * *---------------------------------------------------------------------*/ /*! * \brief findNextLargerPrime() * * \param[in] start * \param[out] pprime first prime larger than %start * \return 0 if OK, 1 on error */ l_ok findNextLargerPrime(l_int32 start, l_uint32 *pprime) { l_int32 i, is_prime; if (!pprime) return ERROR_INT("&prime not defined", __func__, 1); *pprime = 0; if (start <= 0) return ERROR_INT("start must be > 0", __func__, 1); for (i = start + 1; ; i++) { lept_isPrime(i, &is_prime, NULL); if (is_prime) { *pprime = i; return 0; } } return ERROR_INT("prime not found!", __func__, 1); } /*! * \brief lept_isPrime() * * \param[in] n 64-bit unsigned * \param[out] pis_prime 1 if prime, 0 otherwise * \param[out] pfactor [optional] smallest divisor, or 0 on error * or if prime * \return 0 if OK, 1 on error */ l_ok lept_isPrime(l_uint64 n, l_int32 *pis_prime, l_uint32 *pfactor) { l_uint32 div; l_uint64 limit, ratio; if (pis_prime) *pis_prime = 0; if (pfactor) *pfactor = 0; if (!pis_prime) return ERROR_INT("&is_prime not defined", __func__, 1); if (n <= 0) return ERROR_INT("n must be > 0", __func__, 1); if (n % 2 == 0) { if (pfactor) *pfactor = 2; return 0; } limit = (l_uint64)sqrt((l_float64)n); for (div = 3; div < limit; div += 2) { ratio = n / div; if (ratio * div == n) { if (pfactor) *pfactor = div; return 0; } } *pis_prime = 1; return 0; } /*---------------------------------------------------------------------* * Gray code conversion * *---------------------------------------------------------------------*/ /*! * \brief convertIntToGrayCode() * * \param[in] val integer value * \return corresponding gray code value * * <pre> * Notes: * (1) Gray code values corresponding to integers differ by * only one bit transition between successive integers. * </pre> */ l_uint32 convertIntToGrayCode(l_uint32 val) { return (val >> 1) ^ val; } /*! * \brief convertGrayCodeToInt() * * \param[in] val gray code value * \return corresponding integer value */ l_uint32 convertGrayCodeToInt(l_uint32 val) { l_uint32 shift; for (shift = 1; shift < 32; shift <<= 1) val ^= val >> shift; return val; } /*---------------------------------------------------------------------* * Leptonica version number * *---------------------------------------------------------------------*/ /*! * \brief getLeptonicaVersion() * * Return: string of version number (e.g., 'leptonica-1.74.2') * * Notes: * (1) The caller has responsibility to free the memory. */ char * getLeptonicaVersion(void) { size_t bufsize = 100; char *version = (char *)LEPT_CALLOC(bufsize, sizeof(char)); #ifdef _MSC_VER #ifdef _USRDLL char dllStr[] = "DLL"; #else char dllStr[] = "LIB"; #endif #ifdef _DEBUG char debugStr[] = "Debug"; #else char debugStr[] = "Release"; #endif #ifdef _M_IX86 char bitStr[] = " x86"; #elif _M_X64 char bitStr[] = " x64"; #else char bitStr[] = ""; #endif snprintf(version, bufsize, "leptonica-%d.%d.%d (%s, %s) [MSC v.%d %s %s%s]", LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION, __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr); #else snprintf(version, bufsize, "leptonica-%d.%d.%d", LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION); #endif /* _MSC_VER */ return version; } /*---------------------------------------------------------------------* * Timing procs * *---------------------------------------------------------------------*/ #if !defined(_WIN32) && !defined(__Fuchsia__) #include <sys/time.h> #include <sys/resource.h> static struct rusage rusage_before; static struct rusage rusage_after; /*! * \brief startTimer(), stopTimer() * * Notes: * (1) These measure the cpu time elapsed between the two calls: * startTimer(); * .... * lept_stderr( "Elapsed time = %7.3f sec\n", stopTimer()); */ void startTimer(void) { getrusage(RUSAGE_SELF, &rusage_before); } l_float32 stopTimer(void) { l_int32 tsec, tusec; getrusage(RUSAGE_SELF, &rusage_after); tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec; tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec; return (tsec + ((l_float32)tusec) / 1000000.0); } /*! * \brief startTimerNested(), stopTimerNested() * * Example of usage: * * L_TIMER t1 = startTimerNested(); * .... * L_TIMER t2 = startTimerNested(); * .... * lept_stderr( "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2)); * .... * lept_stderr( "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1)); */ L_TIMER startTimerNested(void) { struct rusage *rusage_start; rusage_start = (struct rusage *)LEPT_CALLOC(1, sizeof(struct rusage)); getrusage(RUSAGE_SELF, rusage_start); return rusage_start; } l_float32 stopTimerNested(L_TIMER rusage_start) { l_int32 tsec, tusec; struct rusage rusage_stop; getrusage(RUSAGE_SELF, &rusage_stop); tsec = rusage_stop.ru_utime.tv_sec - ((struct rusage *)rusage_start)->ru_utime.tv_sec; tusec = rusage_stop.ru_utime.tv_usec - ((struct rusage *)rusage_start)->ru_utime.tv_usec; LEPT_FREE(rusage_start); return (tsec + ((l_float32)tusec) / 1000000.0); } /*! * \brief l_getCurrentTime() * * \param[out] sec [optional] in seconds since birth of Unix * \param[out] usec [optional] in microseconds since birth of Unix * \return void */ void l_getCurrentTime(l_int32 *sec, l_int32 *usec) { struct timeval tv; gettimeofday(&tv, NULL); if (sec) *sec = (l_int32)tv.tv_sec; if (usec) *usec = (l_int32)tv.tv_usec; } #elif defined(__Fuchsia__) /* resource.h not implemented on Fuchsia. */ /* Timer functions are used for testing and debugging, and * are stubbed out. If they are needed in the future, they * can be implemented in Fuchsia using the zircon syscall * zx_object_get_info() in ZX_INFOR_THREAD_STATS mode. */ void startTimer(void) { } l_float32 stopTimer(void) { return 0.0; } L_TIMER startTimerNested(void) { return NULL; } l_float32 stopTimerNested(L_TIMER rusage_start) { return 0.0; } void l_getCurrentTime(l_int32 *sec, l_int32 *usec) { } #else /* _WIN32 : resource.h not implemented under Windows */ /* Note: if division by 10^7 seems strange, the time is expressed * as the number of 100-nanosecond intervals that have elapsed * since 12:00 A.M. January 1, 1601. */ static ULARGE_INTEGER utime_before; static ULARGE_INTEGER utime_after; void startTimer(void) { HANDLE this_process; FILETIME start, stop, kernel, user; this_process = GetCurrentProcess(); GetProcessTimes(this_process, &start, &stop, &kernel, &user); utime_before.LowPart = user.dwLowDateTime; utime_before.HighPart = user.dwHighDateTime; } l_float32 stopTimer(void) { HANDLE this_process; FILETIME start, stop, kernel, user; ULONGLONG hnsec; /* in units of hecto-nanosecond (100 ns) intervals */ this_process = GetCurrentProcess(); GetProcessTimes(this_process, &start, &stop, &kernel, &user); utime_after.LowPart = user.dwLowDateTime; utime_after.HighPart = user.dwHighDateTime; hnsec = utime_after.QuadPart - utime_before.QuadPart; return (l_float32)(signed)hnsec / 10000000.0f; } L_TIMER startTimerNested(void) { HANDLE this_process; FILETIME start, stop, kernel, user; ULARGE_INTEGER *utime_start; this_process = GetCurrentProcess(); GetProcessTimes (this_process, &start, &stop, &kernel, &user); utime_start = (ULARGE_INTEGER *)LEPT_CALLOC(1, sizeof(ULARGE_INTEGER)); utime_start->LowPart = user.dwLowDateTime; utime_start->HighPart = user.dwHighDateTime; return utime_start; } l_float32 stopTimerNested(L_TIMER utime_start) { HANDLE this_process; FILETIME start, stop, kernel, user; ULARGE_INTEGER utime_stop; ULONGLONG hnsec; /* in units of 100 ns intervals */ this_process = GetCurrentProcess (); GetProcessTimes (this_process, &start, &stop, &kernel, &user); utime_stop.LowPart = user.dwLowDateTime; utime_stop.HighPart = user.dwHighDateTime; hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart; LEPT_FREE(utime_start); return (l_float32)(signed)hnsec / 10000000.0f; } void l_getCurrentTime(l_int32 *sec, l_int32 *usec) { ULARGE_INTEGER utime, birthunix; FILETIME systemtime; LONGLONG birthunixhnsec = 116444736000000000; /*in units of 100 ns */ LONGLONG usecs; GetSystemTimeAsFileTime(&systemtime); utime.LowPart = systemtime.dwLowDateTime; utime.HighPart = systemtime.dwHighDateTime; birthunix.LowPart = (DWORD) birthunixhnsec; birthunix.HighPart = birthunixhnsec >> 32; usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10); if (sec) *sec = (l_int32) (usecs / 1000000); if (usec) *usec = (l_int32) (usecs % 1000000); } #endif /*! * \brief startWallTimer() * * \return walltimer-ptr * * <pre> * Notes: * (1) These measure the wall clock time elapsed between the two calls: * L_WALLTIMER *timer = startWallTimer(); * .... * lept_stderr( "Elapsed time = %f sec\n", stopWallTimer(&timer); * (2) Note that the timer object is destroyed by stopWallTimer(). * </pre> */ L_WALLTIMER * startWallTimer(void) { L_WALLTIMER *timer; timer = (L_WALLTIMER *)LEPT_CALLOC(1, sizeof(L_WALLTIMER)); l_getCurrentTime(&timer->start_sec, &timer->start_usec); return timer; } /*! * \brief stopWallTimer() * * \param[in,out] ptimer walltimer pointer * \return time wall time elapsed in seconds */ l_float32 stopWallTimer(L_WALLTIMER **ptimer) { l_int32 tsec, tusec; L_WALLTIMER *timer; if (!ptimer) return (l_float32)ERROR_FLOAT("&timer not defined", __func__, 0.0); timer = *ptimer; if (!timer) return (l_float32)ERROR_FLOAT("timer not defined", __func__, 0.0); l_getCurrentTime(&timer->stop_sec, &timer->stop_usec); tsec = timer->stop_sec - timer->start_sec; tusec = timer->stop_usec - timer->start_usec; LEPT_FREE(timer); *ptimer = NULL; return (tsec + ((l_float32)tusec) / 1000000.0f); } /*! * \brief l_getFormattedDate() * * \return formatted date string, or NULL on error * * <pre> * Notes: * (1) This is used in pdf, in the form specified in section 3.8.2 of * http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf * (2) Contributed by Dave Bryan. Works on all platforms. * </pre> */ char * l_getFormattedDate(void) { char buf[128] = "", sep = 'Z'; l_int32 gmt_offset, relh, relm; time_t ut, lt; struct tm Tm; struct tm *tptr = &Tm; ut = time(NULL); /* This generates a second "time_t" value by calling "gmtime" to fill in a "tm" structure expressed as UTC and then calling "mktime", which expects a "tm" structure expressed as the local time. The result is a value that is offset from the value returned by the "time" function by the local UTC offset. "tm_isdst" is set to -1 to tell "mktime" to determine for itself whether DST is in effect. This is necessary because "gmtime" always sets "tm_isdst" to 0, which would tell "mktime" to presume that DST is not in effect. */ #ifdef _WIN32 #ifdef _MSC_VER gmtime_s(tptr, &ut); #else /* mingw */ tptr = gmtime(&ut); #endif #else gmtime_r(&ut, tptr); #endif tptr->tm_isdst = -1; lt = mktime(tptr); /* Calls "difftime" to obtain the resulting difference in seconds, * because "time_t" is an opaque type, per the C standard. */ gmt_offset = (l_int32) difftime(ut, lt); if (gmt_offset > 0) sep = '+'; else if (gmt_offset < 0) sep = '-'; relh = L_ABS(gmt_offset) / 3600; relm = (L_ABS(gmt_offset) % 3600) / 60; #ifdef _WIN32 #ifdef _MSC_VER localtime_s(tptr, &ut); #else /* mingw */ tptr = localtime(&ut); #endif #else localtime_r(&ut, tptr); #endif strftime(buf, sizeof(buf), "%Y%m%d%H%M%S", tptr); sprintf(buf + 14, "%c%02d'%02d'", sep, relh, relm); return stringNew(buf); }
