Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/leptonica/src/utils1.c @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:44:09 +0200
parents: b50eed0cc0ef
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/utils1.c	Mon Sep 15 11:44:09 2025 +0200
@@ -0,0 +1,1454 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file utils1.c
+ * <pre>
+ *
+ *       ------------------------------------------
+ *       This file has these utilities:
+ *         - error, warning and info messages
+ *         - redirection of stderr
+ *         - low-level endian conversions
+ *         - file corruption operations
+ *         - random and prime number operations
+ *         - 64-bit hash functions
+ *         - leptonica version number accessor
+ *         - timing and date operations
+ *       ------------------------------------------
+ *
+ *       Control of error, warning and info messages
+ *           l_int32    setMsgSeverity()
+ *
+ *       Error return functions, invoked by macros
+ *           l_int32    returnErrorInt()
+ *           l_float32  returnErrorFloat()
+ *           void      *returnErrorPtr()
+ *           l_int32    returnErrorInt1()
+ *           l_float32  returnErrorFloat1()
+ *           void      *returnErrorPtr1()
+ *
+ *       Runtime redirection of stderr
+ *           void leptSetStderrHandler()
+ *           void lept_stderr()
+ *
+ *       Test files for equivalence
+ *           l_int32    filesAreIdentical()
+ *
+ *       Byte-swapping data conversion
+ *           l_uint16   convertOnBigEnd16()
+ *           l_uint32   convertOnBigEnd32()
+ *           l_uint16   convertOnLittleEnd16()
+ *           l_uint32   convertOnLittleEnd32()
+ *
+ *       File corruption and byte replacement operations
+ *           l_int32    fileCorruptByDeletion()
+ *           l_int32    fileCorruptByMutation()
+ *           l_int32    fileReplaceBytes()
+ *
+ *       Generate random integer in given interval
+ *           l_int32    genRandomIntOnInterval()
+ *
+ *       Simple math functions
+ *           l_int32    lept_roundftoi()
+ *           l_int32    lept_floor()
+ *           l_int32    lept_ceiling()
+ *
+ *       64-bit hash functions
+ *           l_int32    l_hashStringToUint64()
+ *           l_int32    l_hashStringToUint64Fast()
+ *           l_int32    l_hashPtToUint64()
+ *           l_int32    l_hashFloat64ToUint64()
+ *
+ *       Prime finders
+ *           l_int32    findNextLargerPrime()
+ *           l_int32    lept_isPrime()
+ *
+ *       Gray code conversion
+ *           l_uint32   convertIntToGrayCode()
+ *           l_uint32   convertGrayCodeToInt()
+ *
+ *       Leptonica version number
+ *           char      *getLeptonicaVersion()
+ *
+ *       Timing
+ *           void       startTimer()
+ *           l_float32  stopTimer()
+ *           L_TIMER    startTimerNested()
+ *           l_float32  stopTimerNested()
+ *           void       l_getCurrentTime()
+ *           L_WALLTIMER  *startWallTimer()
+ *           l_float32  stopWallTimer()
+ *           void       l_getFormattedDate()
+ *
+ *  For all issues with cross-platform development, see utils2.c.
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#ifdef _WIN32
+#include <windows.h>
+#endif  /* _WIN32 */
+
+#include <time.h>
+#include "allheaders.h"
+#include <math.h>
+
+    /* Global for controlling message output at runtime */
+LEPT_DLL l_int32  LeptMsgSeverity = DEFAULT_SEVERITY;
+
+#define  DEBUG_SEV     0
+
+/*----------------------------------------------------------------------*
+ *                Control of error, warning and info messages           *
+ *----------------------------------------------------------------------*/
+/*!
+ * \brief   setMsgSeverity()
+ *
+ * \param[in]    newsev
+ * \return  oldsev
+ *
+ * <pre>
+ * Notes:
+ *      (1) setMsgSeverity() allows the user to specify the desired
+ *          message severity threshold.  Messages of equal or greater
+ *          severity will be output.  The previous message severity is
+ *          returned when the new severity is set.
+ *      (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be
+ *          obtained from the LEPT_MSG_SEVERITY environment variable.
+ * </pre>
+ */
+l_int32
+setMsgSeverity(l_int32  newsev)
+{
+l_int32  oldsev;
+char    *envsev;
+
+    oldsev = LeptMsgSeverity;
+    if (newsev == L_SEVERITY_EXTERNAL) {
+        envsev = getenv("LEPT_MSG_SEVERITY");
+        if (envsev) {
+            LeptMsgSeverity = atoi(envsev);
+#if DEBUG_SEV
+            L_INFO("message severity set to external\n", "setMsgSeverity");
+#endif  /* DEBUG_SEV */
+        } else {
+#if DEBUG_SEV
+            L_WARNING("environment var LEPT_MSG_SEVERITY not defined\n",
+                      "setMsgSeverity");
+#endif  /* DEBUG_SEV */
+        }
+    } else {
+        LeptMsgSeverity = newsev;
+#if DEBUG_SEV
+        L_INFO("message severity set to %d\n", "setMsgSeverity", newsev);
+#endif  /* DEBUG_SEV */
+    }
+
+    return oldsev;
+}
+
+
+/*----------------------------------------------------------------------*
+ *                Error return functions, invoked by macros             *
+ *----------------------------------------------------------------------*
+ *                                                                      *
+ *    (1) These error functions print messages to stderr and allow      *
+ *        exit from the function that called them.                      *
+ *    (2) They must be invoked only by the three argument macros        *
+ *           ERROR_INT, ERROR_FLOAT, ERROR_PTR                          *
+ *        or the four argument macros                                   *
+ *           ERROR_INT_1, ERROR_FLOAT_1, ERROR_PTR_1                    *
+ *        which are in environ.h.                                       *
+ *    (3) The print output can be disabled at compile time, either      *
+ *        by using -DNO_CONSOLE_IO or by setting LeptMsgSeverity.       *
+ *----------------------------------------------------------------------*/
+/*!
+ * \brief   returnErrorInt()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    procname   use __func__
+ * \param[in]    ival       return error val (typically 1 for an error)  
+ * \return  ival
+ */
+l_int32
+returnErrorInt(const char  *msg,
+               const char  *procname,
+               l_int32      ival)
+{
+    lept_stderr("Error in %s: %s\n", procname, msg);
+    return ival;
+}
+
+
+/*!
+ * \brief   returnErrorFloat()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    procname   use __func__
+ * \param[in]    fval       return float error val
+ * \return  fval
+ */
+l_float32
+returnErrorFloat(const char  *msg,
+                 const char  *procname,
+                 l_float32    fval)
+{
+    lept_stderr("Error in %s: %s\n", procname, msg);
+    return fval;
+}
+
+
+/*!
+ * \brief   returnErrorPtr()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    procname   use __func__
+ * \param[in]    pval       return error val (typically null for an error)
+ * \return  pval
+ */
+void *
+returnErrorPtr(const char  *msg,
+               const char  *procname,
+               void        *pval)
+{
+    lept_stderr("Error in %s: %s\n", procname, msg);
+    return pval;
+}
+
+
+/*!
+ * \brief   returnErrorInt1()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    arg        additional error message argument
+ *                          (will be appended to the error message)
+ * \param[in]    procname   use __func__
+ * \param[in]    ival       return error val; typically 1 for an error return
+ * \return  ival   typically 1 for an error return
+ */
+l_int32
+returnErrorInt1(const char  *msg,
+                const char  *arg,
+                const char  *procname,
+                l_int32      ival)
+{
+    lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
+    return ival;
+}
+
+
+/*!
+ * \brief   returnErrorFloat1()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    arg        additional error message argument
+ *                          (will be appended to the error message)
+ * \param[in]    procname   use __func__
+ * \param[in]    fval       return float error val
+ * \return  fval
+ */
+l_float32
+returnErrorFloat1(const char  *msg,
+                  const char  *arg,
+                  const char  *procname,
+                  l_float32    fval)
+{
+    lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
+    return fval;
+}
+
+
+/*!
+ * \brief   returnErrorPtr1()
+ *
+ * \param[in]    msg        error message
+ * \param[in]    arg        additional error message argument
+ *                          (will be appended to the error message)
+ * \param[in]    procname   use __func__
+ * \param[in]    pval       return error val (typically null for an error)
+ * \return  pval
+ */
+void *
+returnErrorPtr1(const char  *msg,
+                const char  *arg,
+                const char  *procname,
+                void        *pval)
+{
+    lept_stderr("Leptonica Error in %s: %s: %s\n", procname, msg, arg);
+    return pval;
+}
+
+
+/*------------------------------------------------------------------------*
+ *                   Runtime redirection of stderr                        *
+ *------------------------------------------------------------------------*
+ *                                                                        *
+ *  The user can provide a callback function to redirect messages         *
+ *  that would otherwise go to stderr.  Here are two examples:            *
+ *  (1) to stop all messages:                                             *
+ *      void send_to_devnull(const char *msg) {}                          *
+ *  (2) to write to the system logger:                                    *
+ *      void send_to_syslog(const char *msg) {                            *
+ *           syslog(1, msg);                                              *
+ *      }                                                                 *
+ *  These would then be registered using                                  *
+ *      leptSetStderrHandler(send_to_devnull);                            *
+ *  and                                                                   *
+ *      leptSetStderrHandler(send_to_syslog);                             *
+ *------------------------------------------------------------------------*/
+    /* By default, all messages go to stderr */
+static void lept_default_stderr_handler(const char *formatted_msg)
+{
+    if (formatted_msg)
+        fputs(formatted_msg, stderr);
+}
+
+    /* The stderr callback handler is private to leptonica.
+     * By default it writes to stderr.  */
+void (*stderr_handler)(const char *) = lept_default_stderr_handler;
+
+
+/*!
+ * \brief   leptSetStderrHandler()
+ *
+ * \param[in]    handler   callback function for lept_stderr output
+ * \return  void
+ *
+ * <pre>
+ * Notes:
+ *      (1) This registers a handler for redirection of output to stderr
+ *          at runtime.
+ *      (2) If called with NULL, the output goes to stderr.
+ * </pre>
+ */
+void leptSetStderrHandler(void (*handler)(const char *))
+{
+    if (handler)
+        stderr_handler = handler;
+    else
+        stderr_handler = lept_default_stderr_handler;
+}
+
+
+#define MAX_DEBUG_MESSAGE   2000
+/*!
+ * \brief   lept_stderr()
+ *
+ * \param[in]    fmt      format string
+ * \param[in]    ...      varargs
+ * \return  void
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a replacement for fprintf(), to allow redirection
+ *          of output.  All calls to fprintf(stderr, ...) are replaced
+ *          with calls to lept_stderr(...).
+ *      (2) The message size is limited to 2K bytes.
+        (3) This utility was provided by jbarlow83.
+ * </pre>
+ */
+void lept_stderr(const char *fmt, ...)
+{
+va_list  args;
+char     msg[MAX_DEBUG_MESSAGE];
+l_int32  n;
+
+    va_start(args, fmt);
+    n = vsnprintf(msg, sizeof(msg), fmt, args);
+    va_end(args);
+    if (n < 0)
+        return;
+    (*stderr_handler)(msg);
+}
+
+
+/*--------------------------------------------------------------------*
+ *                    Test files for equivalence                      *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   filesAreIdentical()
+ *
+ * \param[in]    fname1
+ * \param[in]    fname2
+ * \param[out]   psame     1 if identical; 0 if different
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+filesAreIdentical(const char  *fname1,
+                  const char  *fname2,
+                  l_int32     *psame)
+{
+l_int32   i, same;
+size_t    nbytes1, nbytes2;
+l_uint8  *array1, *array2;
+
+    if (!psame)
+        return ERROR_INT("&same not defined", __func__, 1);
+    *psame = 0;
+    if (!fname1 || !fname2)
+        return ERROR_INT("both names not defined", __func__, 1);
+
+    nbytes1 = nbytesInFile(fname1);
+    nbytes2 = nbytesInFile(fname2);
+    if (nbytes1 != nbytes2)
+        return 0;
+
+    if ((array1 = l_binaryRead(fname1, &nbytes1)) == NULL)
+        return ERROR_INT("array1 not read", __func__, 1);
+    if ((array2 = l_binaryRead(fname2, &nbytes2)) == NULL) {
+        LEPT_FREE(array1);
+        return ERROR_INT("array2 not read", __func__, 1);
+    }
+    same = 1;
+    for (i = 0; i < nbytes1; i++) {
+        if (array1[i] != array2[i]) {
+            same = 0;
+            break;
+        }
+    }
+    LEPT_FREE(array1);
+    LEPT_FREE(array2);
+    *psame = same;
+
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------------*
+ *   16 and 32 bit byte-swapping on big endian and little  endian machines  *
+ *--------------------------------------------------------------------------*
+ *                                                                          *
+ *   These are typically used for I/O conversions:                          *
+ *      (1) endian conversion for data that was read from a file            *
+ *      (2) endian conversion on data before it is written to a file        *
+ *--------------------------------------------------------------------------*/
+
+/*--------------------------------------------------------------------*
+ *                        16-bit byte swapping                        *
+ *--------------------------------------------------------------------*/
+#ifdef L_BIG_ENDIAN
+
+l_uint16
+convertOnBigEnd16(l_uint16  shortin)
+{
+    return ((shortin << 8) | (shortin >> 8));
+}
+
+l_uint16
+convertOnLittleEnd16(l_uint16  shortin)
+{
+    return  shortin;
+}
+
+#else     /* L_LITTLE_ENDIAN */
+
+l_uint16
+convertOnLittleEnd16(l_uint16  shortin)
+{
+    return ((shortin << 8) | (shortin >> 8));
+}
+
+l_uint16
+convertOnBigEnd16(l_uint16  shortin)
+{
+    return  shortin;
+}
+
+#endif  /* L_BIG_ENDIAN */
+
+
+/*--------------------------------------------------------------------*
+ *                        32-bit byte swapping                        *
+ *--------------------------------------------------------------------*/
+#ifdef L_BIG_ENDIAN
+
+l_uint32
+convertOnBigEnd32(l_uint32  wordin)
+{
+    return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
+            ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
+}
+
+l_uint32
+convertOnLittleEnd32(l_uint32  wordin)
+{
+    return wordin;
+}
+
+#else  /*  L_LITTLE_ENDIAN */
+
+l_uint32
+convertOnLittleEnd32(l_uint32  wordin)
+{
+    return ((wordin << 24) | ((wordin << 8) & 0x00ff0000) |
+            ((wordin >> 8) & 0x0000ff00) | (wordin >> 24));
+}
+
+l_uint32
+convertOnBigEnd32(l_uint32  wordin)
+{
+    return wordin;
+}
+
+#endif  /* L_BIG_ENDIAN */
+
+
+/*---------------------------------------------------------------------*
+ *           File corruption and byte replacement operations           *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   fileCorruptByDeletion()
+ *
+ * \param[in]    filein
+ * \param[in]    loc       fractional location of start of deletion
+ * \param[in]    size      fractional size of deletion
+ * \param[in]    fileout   corrupted file
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) %loc and %size are expressed as a fraction of the file size.
+ *      (2) This makes a copy of the data in %filein, where bytes in the
+ *          specified region have deleted.
+ *      (3) If (%loc + %size) >= 1.0, this deletes from the position
+ *          represented by %loc to the end of the file.
+ *      (4) It is useful for testing robustness of I/O wrappers when the
+ *          data is corrupted, by simulating data corruption by deletion.
+ * </pre>
+ */
+l_ok
+fileCorruptByDeletion(const char  *filein,
+                      l_float32    loc,
+                      l_float32    size,
+                      const char  *fileout)
+{
+l_int32   i, locb, sizeb, rembytes;
+size_t    inbytes, outbytes;
+l_uint8  *datain, *dataout;
+
+    if (!filein || !fileout)
+        return ERROR_INT("filein and fileout not both specified", __func__, 1);
+    if (loc < 0.0 || loc >= 1.0)
+        return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1);
+    if (size <= 0.0)
+        return ERROR_INT("size must be > 0.0", __func__, 1);
+    if (loc + size > 1.0)
+        size = 1.0f - loc;
+
+    datain = l_binaryRead(filein, &inbytes);
+    locb = (l_int32)(loc * inbytes + 0.5);
+    locb = L_MIN(locb, inbytes - 1);
+    sizeb = (l_int32)(size * inbytes + 0.5);
+    sizeb = L_MAX(1, sizeb);
+    sizeb = L_MIN(sizeb, inbytes - locb);  /* >= 1 */
+    L_INFO("Removed %d bytes at location %d\n", __func__, sizeb, locb);
+    rembytes = inbytes - locb - sizeb;  /* >= 0; to be copied, after excision */
+
+    outbytes = inbytes - sizeb;
+    dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1);
+    for (i = 0; i < locb; i++)
+        dataout[i] = datain[i];
+    for (i = 0; i < rembytes; i++)
+        dataout[locb + i] = datain[locb + sizeb + i];
+    l_binaryWrite(fileout, "w", dataout, outbytes);
+
+    LEPT_FREE(datain);
+    LEPT_FREE(dataout);
+    return 0;
+}
+
+
+/*!
+ * \brief   fileCorruptByMutation()
+ *
+ * \param[in]    filein
+ * \param[in]    loc       fractional location of start of randomization
+ * \param[in]    size      fractional size of randomization
+ * \param[in]    fileout   corrupted file
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) %loc and %size are expressed as a fraction of the file size.
+ *      (2) This makes a copy of the data in %filein, where bytes in the
+ *          specified region have been replaced by random data.
+ *      (3) If (%loc + %size) >= 1.0, this modifies data from the position
+ *          represented by %loc to the end of the file.
+ *      (4) It is useful for testing robustness of I/O wrappers when the
+ *          data is corrupted, by simulating data corruption.
+ * </pre>
+ */
+l_ok
+fileCorruptByMutation(const char  *filein,
+                      l_float32    loc,
+                      l_float32    size,
+                      const char  *fileout)
+{
+l_int32   i, locb, sizeb;
+size_t    bytes;
+l_uint8  *data;
+
+    if (!filein || !fileout)
+        return ERROR_INT("filein and fileout not both specified", __func__, 1);
+    if (loc < 0.0 || loc >= 1.0)
+        return ERROR_INT("loc must be in [0.0 ... 1.0)", __func__, 1);
+    if (size <= 0.0)
+        return ERROR_INT("size must be > 0.0", __func__, 1);
+    if (loc + size > 1.0)
+        size = 1.0f - loc;
+
+    data = l_binaryRead(filein, &bytes);
+    locb = (l_int32)(loc * bytes + 0.5);
+    locb = L_MIN(locb, bytes - 1);
+    sizeb = (l_int32)(size * bytes + 0.5);
+    sizeb = L_MAX(1, sizeb);
+    sizeb = L_MIN(sizeb, bytes - locb);  /* >= 1 */
+    L_INFO("Randomizing %d bytes at location %d\n", __func__, sizeb, locb);
+
+        /* Make an array of random bytes and do the substitution */
+    for (i = 0; i < sizeb; i++) {
+        data[locb + i] =
+            (l_uint8)(255.9 * ((l_float64)rand() / (l_float64)RAND_MAX));
+    }
+
+    l_binaryWrite(fileout, "w", data, bytes);
+    LEPT_FREE(data);
+    return 0;
+}
+
+
+/*!
+ * \brief   fileReplaceBytes()
+ *
+ * \param[in]    filein      input file
+ * \param[in]    start       start location for replacement
+ * \param[in]    nbytes      number of bytes to be removed
+ * \param[in]    newdata     replacement bytes
+ * \param[in]    newsize     size of replacement bytes
+ * \param[in]    fileout     output file
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) To remove %nbytes without replacement, set %newdata == NULL.
+ *      (2) One use is for replacing the date/time in a pdf file by a
+ *          string of 12 '0's, effectively removing the date without
+ *          invalidating the byte counters in the pdf file:
+ *              fileReplaceBytes(filein 86 12 (char *)"000000000000" 12 fileout
+ * </pre>
+ */
+l_ok
+fileReplaceBytes(const char  *filein,
+                 l_int32      start,
+                 l_int32      nbytes,
+                 l_uint8     *newdata,
+                 size_t       newsize,
+                 const char  *fileout)
+{
+l_int32   i, index;
+size_t    inbytes, outbytes;
+l_uint8  *datain, *dataout;
+
+    if (!filein || !fileout)
+        return ERROR_INT("filein and fileout not both specified", __func__, 1);
+
+    datain = l_binaryRead(filein, &inbytes);
+    if (start + nbytes > inbytes)
+        L_WARNING("start + nbytes > length(filein) = %zu\n", __func__, inbytes);
+
+    if (!newdata) newsize = 0;
+    outbytes = inbytes - nbytes + newsize;
+    if ((dataout = (l_uint8 *)LEPT_CALLOC(outbytes, 1)) == NULL) {
+        LEPT_FREE(datain);
+        return ERROR_INT("calloc fail for dataout", __func__, 1);
+    }
+
+    for (i = 0; i < start; i++)
+        dataout[i] = datain[i];
+    for (i = start; i < start + newsize; i++)
+        dataout[i] = newdata[i - start];
+    index = start + nbytes;  /* for datain */
+    start += newsize;  /* for dataout */
+    for (i = start; i < outbytes; i++, index++)
+        dataout[i] = datain[index];
+    l_binaryWrite(fileout, "w", dataout, outbytes);
+
+    LEPT_FREE(datain);
+    LEPT_FREE(dataout);
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *              Generate random integer in given interval              *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   genRandomIntOnInterval()
+ *
+ * \param[in]    start     beginning of interval; can be < 0
+ * \param[in]    end       end of interval; must be >= start
+ * \param[in]    seed      use 0 to skip; otherwise call srand
+ * \param[out]   pval      random integer in interval [start ... end]
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+genRandomIntOnInterval(l_int32   start,
+                       l_int32   end,
+                       l_int32   seed,
+                       l_int32  *pval)
+{
+l_float64  range;
+
+    if (!pval)
+        return ERROR_INT("&val not defined", __func__, 1);
+    *pval = 0;
+    if (end < start)
+        return ERROR_INT("invalid range", __func__, 1);
+
+    if (seed > 0) srand(seed);
+    range = (l_float64)(end - start + 1);
+    *pval = start + (l_int32)((l_float64)range *
+                       ((l_float64)rand() / (l_float64)RAND_MAX));
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                        Simple math functions                        *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   lept_roundftoi()
+ *
+ * \param[in]    fval
+ * \return  value rounded to the nearest integer
+ *
+ * <pre>
+ * Notes:
+ *      (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5)
+ *          For fval < 0, fval --> -round(-fval))
+ *          This is symmetric around 0.
+ *          e.g., for fval in (-0.5 ... 0.5), fval --> 0
+ * </pre>
+ */
+l_int32
+lept_roundftoi(l_float32  fval)
+{
+    return (fval >= 0.0) ? (l_int32)(fval + 0.5) : (l_int32)(fval - 0.5);
+}
+
+
+/*!
+ * \brief   lept_floor()
+ *
+ * \param[in]    fval
+ * \return  largest integer that is not greater than %fval
+ */
+l_int32
+lept_floor(l_float32  fval)
+{
+    return (fval >= 0.0) ? (l_int32)(fval) : -(l_int32)(-fval);
+}
+
+
+/*!
+ * \brief   lept_ceiling()
+ *
+ * \param[in]    fval
+ * \return  smallest integer that is not less than %fval
+ *
+ * <pre>
+ * Notes:
+ *      (1) If fval is equal to its interger value, return that.
+ *          Otherwise:
+ *            For fval > 0, fval --> 1 + floor(fval)
+ *            For fval < 0, fval --> -(1 + floor(-fval))
+ * </pre>
+ */
+l_int32
+lept_ceiling(l_float32  fval)
+{
+    return (fval == (l_int32)fval ? (l_int32)fval :
+            fval > 0.0 ? 1 + (l_int32)(fval) : -(1 + (l_int32)(-fval)));
+}
+
+
+/*---------------------------------------------------------------------*
+ *                        64-bit hash functions                        *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   l_hashStringToUint64()
+ *
+ * \param[in]    str
+ * \param[out]   phash    hash value
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The intent of the hash is to avoid collisions by mapping
+ *          the string as randomly as possible into 64 bits.
+ *      (2) To the extent that the hashes are random, the probability of
+ *          a collision can be approximated by the square of the number
+ *          of strings divided by 2^64.  For 1 million strings, the
+ *          collision probability is about 1 in 16 million.
+ *      (3) I expect non-randomness of the distribution to be most evident
+ *          for small text strings.  This hash function has been tested
+ *          for all 5-character text strings composed of 26 letters,
+ *          of which there are 26^5 = 12356630.  There are no hash
+ *          collisions for this set.
+ * </pre>
+ */
+l_ok
+l_hashStringToUint64(const char  *str,
+                     l_uint64    *phash)
+{
+l_uint64  hash, mulp;
+
+    if (phash) *phash = 0;
+    if (!str || (str[0] == '\0'))
+        return ERROR_INT("str not defined or empty", __func__, 1);
+    if (!phash)
+        return ERROR_INT("&hash not defined", __func__, 1);
+
+    mulp = 26544357894361247;  /* prime, about 1/700 of the max uint64 */
+    hash = 104395301;
+    while (*str) {
+        hash += (*str++ * mulp) ^ (hash >> 7);   /* shift [1...23] are ok */
+    }
+    *phash = hash ^ (hash << 37);
+    return 0;
+}
+
+
+/*!
+ * \brief   l_hashStringToUint64Fast()
+ *
+ * \param[in]    str
+ * \param[out]   phash    hash value
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *     (1) This very simple hash algorithm is described in "The Practice
+ *         of Programming" by Kernighan and Pike, p. 57 (1999).
+ *     (2) The returned hash value would then be hashed into an index into
+ *         the hashtable, using the mod operator with the hashtable size.
+ * </pre>
+ */
+l_ok
+l_hashStringToUint64Fast(const char  *str,
+                         l_uint64    *phash)
+{
+l_uint64  h;
+l_uint8  *p;
+
+    if (phash) *phash = 0;
+    if (!str || (str[0] == '\0'))
+        return ERROR_INT("str not defined or empty", __func__, 1);
+    if (!phash)
+        return ERROR_INT("&hash not defined", __func__, 1);
+
+    h = 0;
+    for (p = (l_uint8 *)str; *p != '\0'; p++)
+        h = 37 * h + *p;  /* 37 is good prime number for this */
+    *phash = h;
+    return 0;
+}
+
+
+/*!
+ * \brief   l_hashPtToUint64()
+ *
+ * \param[in]    x, y
+ * \param[out]   phash    hash value
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This simple hash function has no collisions for
+ *          any of 400 million points with x and y up to 20000.
+ * </pre>
+ */
+l_ok
+l_hashPtToUint64(l_int32    x,
+                 l_int32    y,
+                 l_uint64  *phash)
+{
+    if (!phash)
+        return ERROR_INT("&hash not defined", __func__, 1);
+
+    *phash = (l_uint64)(2173249142.3849 * x + 3763193258.6227 * y);
+    return 0;
+}
+
+
+/*!
+ * \brief   l_hashFloat64ToUint64()
+ *
+ * \param[in]    val
+ * \param[out]   phash      hash key
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a simple hash for using hashmaps with 64-bit float data.
+ *      (2) The resulting hash is called a "key" in a lookup operation.
+ *          The bucket for %val in a hashmap is then found by taking the mod
+ *          of the hash key with the number of buckets (which is prime).
+ * </pre>
+ */
+l_ok
+l_hashFloat64ToUint64(l_float64  val,
+                      l_uint64  *phash)
+{
+    if (!phash)
+        return ERROR_INT("&hash not defined", __func__, 1);
+    val = (val >= 0.0) ? 847019.66701 * val : -217324.91613 * val;
+    *phash = (l_uint64)val;
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                           Prime finders                             *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   findNextLargerPrime()
+ *
+ * \param[in]    start
+ * \param[out]   pprime    first prime larger than %start
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+findNextLargerPrime(l_int32    start,
+                    l_uint32  *pprime)
+{
+l_int32  i, is_prime;
+
+    if (!pprime)
+        return ERROR_INT("&prime not defined", __func__, 1);
+    *pprime = 0;
+    if (start <= 0)
+        return ERROR_INT("start must be > 0", __func__, 1);
+
+    for (i = start + 1; ; i++) {
+        lept_isPrime(i, &is_prime, NULL);
+        if (is_prime) {
+            *pprime = i;
+            return 0;
+        }
+    }
+
+    return ERROR_INT("prime not found!", __func__, 1);
+}
+
+
+/*!
+ * \brief   lept_isPrime()
+ *
+ * \param[in]    n           64-bit unsigned
+ * \param[out]   pis_prime   1 if prime, 0 otherwise
+ * \param[out]   pfactor     [optional] smallest divisor, or 0 on error
+ *                           or if prime
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+lept_isPrime(l_uint64   n,
+             l_int32   *pis_prime,
+             l_uint32  *pfactor)
+{
+l_uint32  div;
+l_uint64  limit, ratio;
+
+    if (pis_prime) *pis_prime = 0;
+    if (pfactor) *pfactor = 0;
+    if (!pis_prime)
+        return ERROR_INT("&is_prime not defined", __func__, 1);
+    if (n <= 0)
+        return ERROR_INT("n must be > 0", __func__, 1);
+
+    if (n % 2 == 0) {
+        if (pfactor) *pfactor = 2;
+        return 0;
+    }
+
+    limit = (l_uint64)sqrt((l_float64)n);
+    for (div = 3; div < limit; div += 2) {
+       ratio = n / div;
+       if (ratio * div == n) {
+           if (pfactor) *pfactor = div;
+           return 0;
+       }
+    }
+
+    *pis_prime = 1;
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                         Gray code conversion                        *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   convertIntToGrayCode()
+ *
+ * \param[in]  val    integer value
+ * \return     corresponding gray code value
+ *
+ * <pre>
+ * Notes:
+ *      (1) Gray code values corresponding to integers differ by
+ *          only one bit transition between successive integers.
+ * </pre>
+ */
+l_uint32
+convertIntToGrayCode(l_uint32 val)
+{
+    return (val >> 1) ^ val;
+}
+
+
+/*!
+ * \brief   convertGrayCodeToInt()
+ *
+ * \param[in]  val    gray code value
+ * \return     corresponding integer value
+ */
+l_uint32
+convertGrayCodeToInt(l_uint32 val)
+{
+l_uint32  shift;
+
+    for (shift = 1; shift < 32; shift <<= 1)
+        val ^= val >> shift;
+    return val;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                       Leptonica version number                      *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief   getLeptonicaVersion()
+ *
+ *      Return: string of version number (e.g., 'leptonica-1.74.2')
+ *
+ *  Notes:
+ *      (1) The caller has responsibility to free the memory.
+ */
+char *
+getLeptonicaVersion(void)
+{
+size_t  bufsize = 100;
+
+    char *version = (char *)LEPT_CALLOC(bufsize, sizeof(char));
+
+#ifdef _MSC_VER
+  #ifdef _USRDLL
+    char dllStr[] = "DLL";
+  #else
+    char dllStr[] = "LIB";
+  #endif
+  #ifdef _DEBUG
+    char debugStr[] = "Debug";
+  #else
+    char debugStr[] = "Release";
+  #endif
+  #ifdef _M_IX86
+    char bitStr[] = " x86";
+  #elif _M_X64
+    char bitStr[] = " x64";
+  #else
+    char bitStr[] = "";
+  #endif
+    snprintf(version, bufsize, "leptonica-%d.%d.%d (%s, %s) [MSC v.%d %s %s%s]",
+           LIBLEPT_MAJOR_VERSION, LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION,
+           __DATE__, __TIME__, _MSC_VER, dllStr, debugStr, bitStr);
+
+#else
+
+    snprintf(version, bufsize, "leptonica-%d.%d.%d", LIBLEPT_MAJOR_VERSION,
+             LIBLEPT_MINOR_VERSION, LIBLEPT_PATCH_VERSION);
+
+#endif   /* _MSC_VER */
+    return version;
+}
+
+
+/*---------------------------------------------------------------------*
+ *                           Timing procs                              *
+ *---------------------------------------------------------------------*/
+#if !defined(_WIN32) && !defined(__Fuchsia__)
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+static struct rusage rusage_before;
+static struct rusage rusage_after;
+
+/*!
+ * \brief   startTimer(), stopTimer()
+ *
+ *  Notes:
+ *      (1) These measure the cpu time elapsed between the two calls:
+ *            startTimer();
+ *            ....
+ *            lept_stderr( "Elapsed time = %7.3f sec\n", stopTimer());
+ */
+void
+startTimer(void)
+{
+    getrusage(RUSAGE_SELF, &rusage_before);
+}
+
+l_float32
+stopTimer(void)
+{
+l_int32  tsec, tusec;
+
+    getrusage(RUSAGE_SELF, &rusage_after);
+
+    tsec = rusage_after.ru_utime.tv_sec - rusage_before.ru_utime.tv_sec;
+    tusec = rusage_after.ru_utime.tv_usec - rusage_before.ru_utime.tv_usec;
+    return (tsec + ((l_float32)tusec) / 1000000.0);
+}
+
+
+/*!
+ * \brief   startTimerNested(), stopTimerNested()
+ *
+ *  Example of usage:
+ *
+ *      L_TIMER  t1 = startTimerNested();
+ *      ....
+ *      L_TIMER  t2 = startTimerNested();
+ *      ....
+ *      lept_stderr( "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2));
+ *      ....
+ *      lept_stderr( "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1));
+ */
+L_TIMER
+startTimerNested(void)
+{
+struct rusage  *rusage_start;
+
+    rusage_start = (struct rusage *)LEPT_CALLOC(1, sizeof(struct rusage));
+    getrusage(RUSAGE_SELF, rusage_start);
+    return rusage_start;
+}
+
+l_float32
+stopTimerNested(L_TIMER  rusage_start)
+{
+l_int32        tsec, tusec;
+struct rusage  rusage_stop;
+
+    getrusage(RUSAGE_SELF, &rusage_stop);
+
+    tsec = rusage_stop.ru_utime.tv_sec -
+           ((struct rusage *)rusage_start)->ru_utime.tv_sec;
+    tusec = rusage_stop.ru_utime.tv_usec -
+           ((struct rusage *)rusage_start)->ru_utime.tv_usec;
+    LEPT_FREE(rusage_start);
+    return (tsec + ((l_float32)tusec) / 1000000.0);
+}
+
+
+/*!
+ * \brief   l_getCurrentTime()
+ *
+ * \param[out]   sec     [optional] in seconds since birth of Unix
+ * \param[out]   usec    [optional] in microseconds since birth of Unix
+ * \return  void
+ */
+void
+l_getCurrentTime(l_int32  *sec,
+                 l_int32  *usec)
+{
+struct timeval tv;
+
+    gettimeofday(&tv, NULL);
+    if (sec) *sec = (l_int32)tv.tv_sec;
+    if (usec) *usec = (l_int32)tv.tv_usec;
+}
+
+#elif defined(__Fuchsia__) /* resource.h not implemented on Fuchsia. */
+
+    /* Timer functions are used for testing and debugging, and
+     * are stubbed out.  If they are needed in the future, they
+     * can be implemented in Fuchsia using the zircon syscall
+     * zx_object_get_info() in ZX_INFOR_THREAD_STATS mode.  */
+void
+startTimer(void)
+{
+}
+
+l_float32
+stopTimer(void)
+{
+    return 0.0;
+}
+
+L_TIMER
+startTimerNested(void)
+{
+    return NULL;
+}
+
+l_float32
+stopTimerNested(L_TIMER  rusage_start)
+{
+    return 0.0;
+}
+
+void
+l_getCurrentTime(l_int32  *sec,
+                 l_int32  *usec)
+{
+}
+
+#else   /* _WIN32 : resource.h not implemented under Windows */
+
+    /* Note: if division by 10^7 seems strange, the time is expressed
+     * as the number of 100-nanosecond intervals that have elapsed
+     * since 12:00 A.M. January 1, 1601.  */
+
+static ULARGE_INTEGER utime_before;
+static ULARGE_INTEGER utime_after;
+
+void
+startTimer(void)
+{
+HANDLE    this_process;
+FILETIME  start, stop, kernel, user;
+
+    this_process = GetCurrentProcess();
+
+    GetProcessTimes(this_process, &start, &stop, &kernel, &user);
+
+    utime_before.LowPart  = user.dwLowDateTime;
+    utime_before.HighPart = user.dwHighDateTime;
+}
+
+l_float32
+stopTimer(void)
+{
+HANDLE     this_process;
+FILETIME   start, stop, kernel, user;
+ULONGLONG  hnsec;  /* in units of hecto-nanosecond (100 ns) intervals */
+
+    this_process = GetCurrentProcess();
+
+    GetProcessTimes(this_process, &start, &stop, &kernel, &user);
+
+    utime_after.LowPart  = user.dwLowDateTime;
+    utime_after.HighPart = user.dwHighDateTime;
+    hnsec = utime_after.QuadPart - utime_before.QuadPart;
+    return (l_float32)(signed)hnsec / 10000000.0f;
+}
+
+L_TIMER
+startTimerNested(void)
+{
+HANDLE           this_process;
+FILETIME         start, stop, kernel, user;
+ULARGE_INTEGER  *utime_start;
+
+    this_process = GetCurrentProcess();
+
+    GetProcessTimes (this_process, &start, &stop, &kernel, &user);
+
+    utime_start = (ULARGE_INTEGER *)LEPT_CALLOC(1, sizeof(ULARGE_INTEGER));
+    utime_start->LowPart  = user.dwLowDateTime;
+    utime_start->HighPart = user.dwHighDateTime;
+    return utime_start;
+}
+
+l_float32
+stopTimerNested(L_TIMER  utime_start)
+{
+HANDLE          this_process;
+FILETIME        start, stop, kernel, user;
+ULARGE_INTEGER  utime_stop;
+ULONGLONG       hnsec;  /* in units of 100 ns intervals */
+
+    this_process = GetCurrentProcess ();
+
+    GetProcessTimes (this_process, &start, &stop, &kernel, &user);
+
+    utime_stop.LowPart  = user.dwLowDateTime;
+    utime_stop.HighPart = user.dwHighDateTime;
+    hnsec = utime_stop.QuadPart - ((ULARGE_INTEGER *)utime_start)->QuadPart;
+    LEPT_FREE(utime_start);
+    return (l_float32)(signed)hnsec / 10000000.0f;
+}
+
+void
+l_getCurrentTime(l_int32  *sec,
+                 l_int32  *usec)
+{
+ULARGE_INTEGER  utime, birthunix;
+FILETIME        systemtime;
+LONGLONG        birthunixhnsec = 116444736000000000;  /*in units of 100 ns */
+LONGLONG        usecs;
+
+    GetSystemTimeAsFileTime(&systemtime);
+    utime.LowPart  = systemtime.dwLowDateTime;
+    utime.HighPart = systemtime.dwHighDateTime;
+
+    birthunix.LowPart = (DWORD) birthunixhnsec;
+    birthunix.HighPart = birthunixhnsec >> 32;
+
+    usecs = (LONGLONG) ((utime.QuadPart - birthunix.QuadPart) / 10);
+
+    if (sec) *sec = (l_int32) (usecs / 1000000);
+    if (usec) *usec = (l_int32) (usecs % 1000000);
+}
+
+#endif
+
+
+/*!
+ * \brief   startWallTimer()
+ *
+ * \return  walltimer-ptr
+ *
+ * <pre>
+ * Notes:
+ *      (1) These measure the wall clock time  elapsed between the two calls:
+ *            L_WALLTIMER *timer = startWallTimer();
+ *            ....
+ *            lept_stderr( "Elapsed time = %f sec\n", stopWallTimer(&timer);
+ *      (2) Note that the timer object is destroyed by stopWallTimer().
+ * </pre>
+ */
+L_WALLTIMER *
+startWallTimer(void)
+{
+L_WALLTIMER  *timer;
+
+    timer = (L_WALLTIMER *)LEPT_CALLOC(1, sizeof(L_WALLTIMER));
+    l_getCurrentTime(&timer->start_sec, &timer->start_usec);
+    return timer;
+}
+
+/*!
+ * \brief   stopWallTimer()
+ *
+ * \param[in,out]  ptimer     walltimer pointer
+ * \return  time wall time elapsed in seconds
+ */
+l_float32
+stopWallTimer(L_WALLTIMER  **ptimer)
+{
+l_int32       tsec, tusec;
+L_WALLTIMER  *timer;
+
+    if (!ptimer)
+        return (l_float32)ERROR_FLOAT("&timer not defined", __func__, 0.0);
+    timer = *ptimer;
+    if (!timer)
+        return (l_float32)ERROR_FLOAT("timer not defined", __func__, 0.0);
+
+    l_getCurrentTime(&timer->stop_sec, &timer->stop_usec);
+    tsec = timer->stop_sec - timer->start_sec;
+    tusec = timer->stop_usec - timer->start_usec;
+    LEPT_FREE(timer);
+    *ptimer = NULL;
+    return (tsec + ((l_float32)tusec) / 1000000.0f);
+}
+
+
+/*!
+ * \brief   l_getFormattedDate()
+ *
+ * \return  formatted date string, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is used in pdf, in the form specified in section 3.8.2 of
+ *          http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf
+ *      (2) Contributed by Dave Bryan.  Works on all platforms.
+ * </pre>
+ */
+char *
+l_getFormattedDate(void)
+{
+char        buf[128] = "", sep = 'Z';
+l_int32     gmt_offset, relh, relm;
+time_t      ut, lt;
+struct tm   Tm;
+struct tm  *tptr = &Tm;
+
+    ut = time(NULL);
+
+        /* This generates a second "time_t" value by calling "gmtime" to
+           fill in a "tm" structure expressed as UTC and then calling
+           "mktime", which expects a "tm" structure expressed as the
+           local time.  The result is a value that is offset from the
+           value returned by the "time" function by the local UTC offset.
+           "tm_isdst" is set to -1 to tell "mktime" to determine for
+           itself whether DST is in effect.  This is necessary because
+           "gmtime" always sets "tm_isdst" to 0, which would tell
+           "mktime" to presume that DST is not in effect. */
+#ifdef _WIN32
+  #ifdef _MSC_VER
+    gmtime_s(tptr, &ut);
+  #else  /* mingw */
+    tptr = gmtime(&ut);
+  #endif
+#else
+    gmtime_r(&ut, tptr);
+#endif
+    tptr->tm_isdst = -1;
+    lt = mktime(tptr);
+
+        /* Calls "difftime" to obtain the resulting difference in seconds,
+         * because "time_t" is an opaque type, per the C standard. */
+    gmt_offset = (l_int32) difftime(ut, lt);
+    if (gmt_offset > 0)
+        sep = '+';
+    else if (gmt_offset < 0)
+        sep = '-';
+    relh = L_ABS(gmt_offset) / 3600;
+    relm = (L_ABS(gmt_offset) % 3600) / 60;
+
+#ifdef _WIN32
+  #ifdef _MSC_VER
+    localtime_s(tptr, &ut);
+  #else  /* mingw */
+    tptr = localtime(&ut);
+  #endif
+#else
+    localtime_r(&ut, tptr);
+#endif
+    strftime(buf, sizeof(buf), "%Y%m%d%H%M%S", tptr);
+    sprintf(buf + 14, "%c%02d'%02d'", sep, relh, relm);
+    return stringNew(buf);
+}
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:44:09 +0200
parents	b50eed0cc0ef
children