Mercurial > hgrepos > Python2 > PyMuPDF

diff mupdf-source/thirdparty/leptonica/src/utils2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author: Franz Glasner <fzglas.hg@dom66.de>
date: Mon, 15 Sep 2025 11:43:07 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/utils2.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,3382 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file utils2.c
+ * <pre>
+ *
+ *      ------------------------------------------
+ *      This file has these utilities:
+ *         - safe string operations
+ *         - find/replace operations on strings
+ *         - read/write between file and memory
+ *         - multi-platform file and directory operations
+ *         - file name operations
+ *      ------------------------------------------
+ *
+ *       Safe string procs
+ *           char      *stringNew()
+ *           l_int32    stringCopy()
+ *           l_int32    stringCopySegment()
+ *           l_int32    stringReplace()
+ *           l_int32    stringLength()
+ *           l_int32    stringCat()
+ *           char      *stringConcatNew()
+ *           char      *stringJoin()
+ *           l_int32    stringJoinIP()
+ *           char      *stringReverse()
+ *           char      *strtokSafe()
+ *           l_int32    stringSplitOnToken()
+ *
+ *       Find and replace string and array procs
+ *           l_int32    stringCheckForChars()
+ *           char      *stringRemoveChars()
+ *           char      *stringReplaceEachSubstr()
+ *           char      *stringReplaceSubstr()
+ *           L_DNA     *stringFindEachSubstr()
+ *           l_int32    stringFindSubstr()
+ *           l_uint8   *arrayReplaceEachSequence()
+ *           L_DNA     *arrayFindEachSequence()
+ *           l_int32    arrayFindSequence()
+ *
+ *       Safe realloc
+ *           void      *reallocNew()
+ *
+ *       Read and write between file and memory
+ *           l_uint8   *l_binaryRead()
+ *           l_uint8   *l_binaryReadStream()
+ *           l_uint8   *l_binaryReadSelect()
+ *           l_uint8   *l_binaryReadSelectStream()
+ *           l_int32    l_binaryWrite()
+ *           l_int32    nbytesInFile()
+ *           l_int32    fnbytesInFile()
+ *
+ *       Copy and compare in memory
+ *           l_uint8   *l_binaryCopy()
+ *           l_uint8   *l_binaryCompare()
+ *
+ *       File copy operations
+ *           l_int32    fileCopy()
+ *           l_int32    fileConcatenate()
+ *           l_int32    fileAppendString()
+ *
+ *       File split operations
+ *           l_int32    fileSplitLinesUniform()
+ *
+ *       Multi-platform functions for opening file streams
+ *           FILE      *fopenReadStream()
+ *           FILE      *fopenWriteStream()
+ *           FILE      *fopenReadFromMemory()
+ *
+ *       Opening a Windows tmpfile for writing
+ *           FILE      *fopenWriteWinTempfile()
+ *
+ *       Multi-platform functions that avoid C-runtime boundary crossing
+ *       with Windows DLLs  (use in programs only)
+ *           FILE      *lept_fopen()
+ *           l_int32    lept_fclose()
+ *           void      *lept_calloc()
+ *           void       lept_free()
+ *
+ *       Multi-platform file system operations in temp directories
+ *           l_int32    lept_mkdir()
+ *           l_int32    lept_rmdir()
+ *           l_int32    lept_direxists()
+ *           l_int32    lept_mv()
+ *           l_int32    lept_rm_match()
+ *           l_int32    lept_rm()
+ *           l_int32    lept_rmfile()
+ *           l_int32    lept_cp()
+ *
+ *       Special debug/test function for calling 'system'
+ *           l_int32    callSystemDebug()
+ *
+ *       General file name operations
+ *           l_int32    splitPathAtDirectory()
+ *           l_int32    splitPathAtExtension()
+ *           char      *pathJoin()
+ *           char      *appendSubdirs()
+ *
+ *       Special file name operations
+ *           l_int32    convertSepCharsInPath()
+ *           char      *genPathname()
+ *           l_int32    makeTempDirname()
+ *           l_int32    modifyTrailingSlash()
+ *           char      *l_makeTempFilename()
+ *           l_int32    extractNumberFromFilename()
+ *
+ *
+ *  Notes on multi-platform development
+ *  -----------------------------------
+ *  This is important:
+ *  (1) With the exception of splitPathAtDirectory(), splitPathAtExtension()
+  *     and genPathname(), all input pathnames must have unix separators.
+ *  (2) On macOS, iOS and Windows, for read or write to "/tmp/..."
+ *      the filename is rewritten to use the OS specific temp directory:
+ *         /tmp  ==>   [Temp]/...
+ *  (3) This filename rewrite, along with the conversion from unix
+ *      to OS specific pathnames, happens in genPathname().
+ *  (4) Use fopenReadStream() and fopenWriteStream() to open files,
+ *      because these use genPathname() to find the platform-dependent
+ *      filenames.  Likewise for l_binaryRead() and l_binaryWrite().
+ *  (5) For moving, copying and removing files and directories that are in
+ *      subdirectories of /tmp, use the lept_*() file system shell wrappers:
+ *         lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp().
+ *  (6) For programs use the lept_fopen(), lept_fclose(), lept_calloc()
+ *      and lept_free() C library wrappers.  These work properly on Windows,
+ *      where the same DLL must perform complementary operations on
+ *      file streams (open/close) and heap memory (malloc/free).
+ *  (7) Why read and write files to temp directories?
+ *      The library needs the ability to read and write ephemeral
+ *      files to default places, both for generating debugging output
+ *      and for supporting regression tests.  Applications also need
+ *      this ability for debugging.
+ *  (8) Why do the pathname rewrite on macOS, iOS and Windows?
+ *      The goal is to have the library, and programs using the library,
+ *      run on multiple platforms without changes.  The location of
+ *      temporary files depends on the platform as well as the user's
+ *      configuration.  Temp files on some operating systems are in some
+ *      directory not known a priori.  To make everything work seamlessly on
+ *      any OS, every time you open a file for reading or writing,
+ *      use a special function such as fopenReadStream() or
+ *      fopenWriteStream(); these call genPathname() to ensure that
+ *      if it is a temp file, the correct path is used.  To indicate
+ *      that this is a temp file, the application is written with the
+ *      root directory of the path in a canonical form: "/tmp".
+ *  (9) Why is it that multi-platform directory functions like lept_mkdir()
+ *      and lept_rmdir(), as well as associated file functions like
+ *      lept_rm(), lept_mv() and lept_cp(), only work in the temp dir?
+ *      These functions were designed to provide easy manipulation of
+ *      temp files.  The restriction to temp files is for safety -- to
+ *      prevent an accidental deletion of important files.  For example,
+ *      lept_rmdir() first deletes all files in a specified subdirectory
+ *      of temp, and then removes the directory.
+ *
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif  /* HAVE_CONFIG_H */
+
+#ifdef _MSC_VER
+#include <process.h>
+#include <direct.h>
+#define getcwd _getcwd  /* fix MSVC warning */
+#else
+#include <unistd.h>
+#endif   /* _MSC_VER */
+
+#ifdef _WIN32
+#include <windows.h>
+#include <fcntl.h>     /* _O_CREAT, ... */
+#include <io.h>        /* _open */
+#include <sys/stat.h>  /* _S_IREAD, _S_IWRITE */
+#else
+#include <sys/stat.h>  /* for stat, mkdir(2) */
+#include <sys/types.h>
+#endif
+
+#ifdef __APPLE__
+#include <unistd.h>
+#include <errno.h>
+#endif
+
+#include <string.h>
+#include <stddef.h>
+#include "allheaders.h"
+
+#if defined(__APPLE__) || defined(_WIN32)
+/* Rewrite paths starting with /tmp for macOS, iOS and Windows. */
+#define REWRITE_TMP
+#endif
+
+/*--------------------------------------------------------------------*
+ *                       Safe string operations                       *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   stringNew()
+ *
+ * \param[in]    src
+ * \return  dest copy of %src string, or NULL on error
+ */
+char *
+stringNew(const char  *src)
+{
+l_int32  len;
+char    *dest;
+
+    if (!src) {
+        L_WARNING("src not defined\n", __func__);
+        return NULL;
+    }
+
+    len = strlen(src);
+    if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("dest not made", __func__, NULL);
+
+    stringCopy(dest, src, len);
+    return dest;
+}
+
+
+/*!
+ * \brief   stringCopy()
+ *
+ * \param[in]    dest    existing byte buffer
+ * \param[in]    src     string [optional] can be null
+ * \param[in]    n       max number of characters to copy
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Relatively safe wrapper for strncpy, that checks the input,
+ *          and does not complain if %src is null or %n < 1.
+ *          If %n < 1, this is a no-op.
+ *      (2) %dest needs to be at least %n bytes in size.
+ *      (3) We don't call strncpy() because valgrind complains about
+ *          use of uninitialized values.
+ * </pre>
+ */
+l_ok
+stringCopy(char        *dest,
+           const char  *src,
+           l_int32      n)
+{
+l_int32  i;
+
+    if (!dest)
+        return ERROR_INT("dest not defined", __func__, 1);
+    if (!src || n < 1)
+        return 0;
+
+        /* Implementation of strncpy that valgrind doesn't complain about */
+    for (i = 0; i < n && src[i] != '\0'; i++)
+        dest[i] = src[i];
+    for (; i < n; i++)
+        dest[i] = '\0';
+    return 0;
+}
+
+
+/*!
+ * \brief   stringCopySegment()
+ *
+ *
+ * \param[in]    src      string
+ * \param[in]    start    byte position at start of segment
+ * \param[in]    nbytes   number of bytes in the segment; use 0 to go to end
+ * \return  copy of segment, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a variant of stringNew() that makes a new string
+ *          from a segment of the input string.  The segment is specified
+ *          by the starting position and the number of bytes.
+ *      (2) The start location %start must be within the string %src.
+ *      (3) The copy is truncated to the end of the source string.
+ *          Use %nbytes = 0 to copy to the end of %src.
+ * </pre>
+ */
+char *
+stringCopySegment(const char  *src,
+                  l_int32      start,
+                  l_int32      nbytes)
+{
+char    *dest;
+l_int32  len;
+
+    if (!src)
+        return (char *)ERROR_PTR("src not defined", __func__, NULL);
+    len = strlen(src);
+    if (start < 0 || start > len - 1)
+        return (char *)ERROR_PTR("invalid start", __func__, NULL);
+    if (nbytes <= 0)  /* copy to the end */
+        nbytes = len - start;
+    if (start + nbytes > len)  /* truncate to the end */
+        nbytes = len - start;
+    if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("dest not made", __func__, NULL);
+    stringCopy(dest, src + start, nbytes);
+    return dest;
+}
+
+
+/*!
+ * \brief   stringReplace()
+ *
+ * \param[out]   pdest    string copy
+ * \param[in]    src      [optional] string; can be null
+ * \return  0 if OK; 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Frees any existing dest string
+ *      (2) Puts a copy of src string in the dest
+ *      (3) If either or both strings are null, does something reasonable.
+ * </pre>
+ */
+l_ok
+stringReplace(char       **pdest,
+              const char  *src)
+{
+    if (!pdest)
+        return ERROR_INT("pdest not defined", __func__, 1);
+
+    if (*pdest)
+        LEPT_FREE(*pdest);
+
+    if (src)
+        *pdest = stringNew(src);
+    else
+        *pdest = NULL;
+    return 0;
+}
+
+
+/*!
+ * \brief   stringLength()
+ *
+ * \param[in]    src    string can be null or NULL-terminated string
+ * \param[in]    size   number of bytes to check; e.g., size of src buffer
+ * \return  length of src in bytes; 0 if no bytes are found;
+ *                                  %size on error when NUL byte is not found.
+ *
+ * <pre>
+ * Notes:
+ *      (1) Safe implementation of strlen that only checks %size bytes
+ *          for trailing NUL.
+ *      (2) Valid returned string lengths are between 0 and size - 1.
+ *          If %size bytes are checked without finding a NUL byte, then
+ *          an error is indicated by returning %size.
+ * </pre>
+ */
+l_int32
+stringLength(const char  *src,
+             size_t       size)
+{
+l_int32  i;
+
+    if (!src)
+        return 0;
+    if (size < 1)
+        return ERROR_INT("size < 1; too small", __func__, 0);
+
+    for (i = 0; i < size; i++) {
+        if (src[i] == '\0')
+            return i;
+    }
+
+        /* Didn't find a NUL byte */
+    L_ERROR("NUL byte not found in %zu bytes\n", __func__, size);
+    return size;
+}
+
+
+/*!
+ * \brief   stringCat()
+ *
+ * \param[in]    dest    null-terminated byte buffer
+ * \param[in]    size    size of dest buffer
+ * \param[in]    src     string can be null or NULL-terminated string
+ * \return  number of bytes added to dest; -1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Alternative implementation of strncat, that checks the input,
+ *          is easier to use (since the size of the dest buffer is specified
+ *          rather than the number of bytes to copy), and does not complain
+ *          if %src is null.
+ *      (2) Never writes past end of dest.
+ *      (3) If there is not enough room to append the src, which is an error,
+ *          it does nothing.
+ *      (4) N.B. The order of 2nd and 3rd args is reversed from that in
+ *          strncat, as in the Windows function strcat_s().
+ * </pre>
+ */
+l_int32
+stringCat(char        *dest,
+          size_t       size,
+          const char  *src)
+{
+l_int32  i, n;
+l_int32  lendest, lensrc;
+
+    if (!dest)
+        return ERROR_INT("dest not defined", __func__, -1);
+    if (size < 1)
+        return ERROR_INT("size < 1; too small", __func__, -1);
+    if (!src)
+        return 0;
+
+    lendest = stringLength(dest, size);
+    if (lendest == size)
+        return ERROR_INT("no terminating nul byte", __func__, -1);
+    lensrc = stringLength(src, size);
+    if (lensrc == 0)
+        return 0;  /* nothing added to dest */
+    n = (lendest + lensrc > size - 1) ? 0 : lensrc;
+    if (n == 0)
+        return ERROR_INT("dest too small for append", __func__, -1);
+
+    for (i = 0; i < n; i++)
+        dest[lendest + i] = src[i];
+    dest[lendest + n] = '\0';
+    return n;
+}
+
+
+/*!
+ * \brief   stringConcatNew()
+ *
+ * \param[in]    first    first string in list
+ * \param[in]    ...      NULL-terminated list of strings
+ * \return  result new string concatenating the input strings, or
+ *                      NULL if first == NULL
+ *
+ * <pre>
+ * Notes:
+ *      (1) The last arg in the list of strings must be NULL.
+ *      (2) Caller must free the returned string.
+ * </pre>
+ */
+char *
+stringConcatNew(const char  *first, ...)
+{
+size_t       len;
+char        *result, *ptr;
+const char  *arg;
+va_list      args;
+
+    if (!first) return NULL;
+
+        /* Find the length of the output string */
+    va_start(args, first);
+    len = strlen(first);
+    while ((arg = va_arg(args, const char *)) != NULL)
+        len += strlen(arg);
+    va_end(args);
+    result = (char *)LEPT_CALLOC(len + 1, sizeof(char));
+
+        /* Concatenate the args */
+    va_start(args, first);
+    ptr = result;
+    arg = first;
+    while (*arg)
+        *ptr++ = *arg++;
+    while ((arg = va_arg(args, const char *)) != NULL) {
+        while (*arg)
+            *ptr++ = *arg++;
+    }
+    va_end(args);
+    return result;
+}
+
+
+/*!
+ * \brief   stringJoin()
+ *
+ * \param[in]    src1    [optional] string; can be null
+ * \param[in]    src2    [optional] string; can be null
+ * \return  concatenated string, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a safe version of strcat; it makes a new string.
+ *      (2) It is not an error if either or both of the strings
+ *          are empty, or if either or both of the pointers are null.
+ * </pre>
+ */
+char *
+stringJoin(const char  *src1,
+           const char  *src2)
+{
+char    *dest;
+l_int32  srclen1, srclen2, destlen;
+
+    srclen1 = (src1) ? strlen(src1) : 0;
+    srclen2 = (src2) ? strlen(src2) : 0;
+    destlen = srclen1 + srclen2 + 3;
+
+    if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL);
+
+    if (src1)
+        stringCat(dest, destlen, src1);
+    if (src2)
+        stringCat(dest, destlen, src2);
+    return dest;
+}
+
+
+/*!
+ * \brief   stringJoinIP()
+ *
+ * \param[in,out]  psrc1   address of string src1; cannot be on the stack
+ * \param[in]      src2    [optional] string; can be null
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a safe in-place version of strcat.  The contents of
+ *          src1 is replaced by the concatenation of src1 and src2.
+ *      (2) It is not an error if either or both of the strings
+ *          are empty (""), or if the pointers to the strings (*psrc1, src2)
+ *          are null.
+ *      (3) src1 should be initialized to null or an empty string
+ *          before the first call.  Use one of these:
+ *              char *src1 = NULL;
+ *              char *src1 = stringNew("");
+ *          Then call with:
+ *              stringJoinIP(&src1, src2);
+ *      (4) This can also be implemented as a macro:
+ * \code
+ *              #define stringJoinIP(src1, src2) \
+ *                  {tmpstr = stringJoin((src1),(src2)); \
+ *                  LEPT_FREE(src1); \
+ *                  (src1) = tmpstr;}
+ * \endcode
+ *      (5) Another function to consider for joining many strings is
+ *          stringConcatNew().
+ * </pre>
+ */
+l_ok
+stringJoinIP(char       **psrc1,
+             const char  *src2)
+{
+char  *tmpstr;
+
+    if (!psrc1)
+        return ERROR_INT("&src1 not defined", __func__, 1);
+
+    tmpstr = stringJoin(*psrc1, src2);
+    LEPT_FREE(*psrc1);
+    *psrc1 = tmpstr;
+    return 0;
+}
+
+
+/*!
+ * \brief   stringReverse()
+ *
+ * \param[in]    src    string
+ * \return  dest newly-allocated reversed string
+ */
+char *
+stringReverse(const char  *src)
+{
+char    *dest;
+l_int32  i, len;
+
+    if (!src)
+        return (char *)ERROR_PTR("src not defined", __func__, NULL);
+    len = strlen(src);
+    if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL);
+    for (i = 0; i < len; i++)
+        dest[i] = src[len - 1 - i];
+
+    return dest;
+}
+
+
+/*!
+ * \brief   strtokSafe()
+ *
+ * \param[in]    cstr      input string to be sequentially parsed;
+ *                         use NULL after the first call
+ * \param[in]    seps      a string of character separators
+ * \param[out]   psaveptr  ptr to the next char after
+ *                         the last encountered separator
+ * \return  substr         a new string that is copied from the previous
+ *                         saveptr up to but not including the next
+ *                         separator character, or NULL if end of cstr.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a thread-safe implementation of strtok.
+ *      (2) It has the same interface as strtok_r.
+ *      (3) It differs from strtok_r in usage in two respects:
+ *          (a) the input string is not altered
+ *          (b) each returned substring is newly allocated and must
+ *              be freed after use.
+ *      (4) Let me repeat that.  This is "safe" because the input
+ *          string is not altered and because each returned string
+ *          is newly allocated on the heap.
+ *      (5) It is here because, surprisingly, some C libraries don't
+ *          include strtok_r.
+ *      (6) Important usage points:
+ *          ~ Input the string to be parsed on the first invocation.
+ *          ~ Then input NULL after that; the value returned in saveptr
+ *            is used in all subsequent calls.
+ *      (7) This is only slightly slower than strtok_r.
+ * </pre>
+ */
+char *
+strtokSafe(char        *cstr,
+           const char  *seps,
+           char       **psaveptr)
+{
+char     nextc;
+char    *start, *substr;
+l_int32  istart, i, j, nchars;
+
+    if (!seps)
+        return (char *)ERROR_PTR("seps not defined", __func__, NULL);
+    if (!psaveptr)
+        return (char *)ERROR_PTR("&saveptr not defined", __func__, NULL);
+
+    if (!cstr) {
+        start = *psaveptr;
+    } else {
+        start = cstr;
+        *psaveptr = NULL;
+    }
+    if (!start)  /* nothing to do */
+        return NULL;
+
+        /* First time, scan for the first non-sep character */
+    istart = 0;
+    if (cstr) {
+        for (istart = 0;; istart++) {
+            if ((nextc = start[istart]) == '\0') {
+                *psaveptr = NULL;  /* in case caller doesn't check ret value */
+                return NULL;
+            }
+            if (!strchr(seps, nextc))
+                break;
+        }
+    }
+
+        /* Scan through, looking for a sep character; if none is
+         * found, 'i' will be at the end of the string. */
+    for (i = istart;; i++) {
+        if ((nextc = start[i]) == '\0')
+            break;
+        if (strchr(seps, nextc))
+            break;
+    }
+
+        /* Save the substring */
+    nchars = i - istart;
+    substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char));
+    stringCopy(substr, start + istart, nchars);
+
+        /* Look for the next non-sep character.
+         * If this is the last substring, return a null saveptr. */
+    for (j = i;; j++) {
+        if ((nextc = start[j]) == '\0') {
+            *psaveptr = NULL;  /* no more non-sep characters */
+            break;
+        }
+        if (!strchr(seps, nextc)) {
+            *psaveptr = start + j;  /* start here on next call */
+                break;
+        }
+    }
+
+    return substr;
+}
+
+
+/*!
+ * \brief   stringSplitOnToken()
+ *
+ * \param[in]    cstr     input string to be split; not altered
+ * \param[in]    seps     a string of character separators
+ * \param[out]   phead    ptr to copy of the input string, up to
+ *                        the first separator token encountered
+ * \param[out]   ptail    ptr to copy of the part of the input string
+ *                        starting with the first non-separator character
+ *                        that occurs after the first separator is found
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The input string is not altered; all split parts are new strings.
+ *      (2) The split occurs around the first consecutive sequence of
+ *          tokens encountered.
+ *      (3) The head goes from the beginning of the string up to
+ *          but not including the first token found.
+ *      (4) The tail contains the second part of the string, starting
+ *          with the first char in that part that is NOT a token.
+ *      (5) If no separator token is found, 'head' contains a copy
+ *          of the input string and 'tail' is null.
+ * </pre>
+ */
+l_ok
+stringSplitOnToken(char        *cstr,
+                   const char  *seps,
+                   char       **phead,
+                   char       **ptail)
+{
+char  *saveptr;
+
+    if (!phead)
+        return ERROR_INT("&head not defined", __func__, 1);
+    if (!ptail)
+        return ERROR_INT("&tail not defined", __func__, 1);
+    *phead = *ptail = NULL;
+    if (!cstr)
+        return ERROR_INT("cstr not defined", __func__, 1);
+    if (!seps)
+        return ERROR_INT("seps not defined", __func__, 1);
+
+    *phead = strtokSafe(cstr, seps, &saveptr);
+    if (saveptr)
+        *ptail = stringNew(saveptr);
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                       Find and replace procs                       *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   stringCheckForChars()
+ *
+ * \param[in]    src      input string; can be of zero length
+ * \param[in]    chars    string of chars to be searched for in %src
+ * \param[out]   pfound   1 if any characters are found; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This can be used to sanitize an operation by checking for
+ *          special characters that don't belong in a string.
+ * </pre>
+ */
+l_ok
+stringCheckForChars(const char  *src,
+                    const char  *chars,
+                    l_int32     *pfound)
+{
+char     ch;
+l_int32  i, n;
+
+    if (!pfound)
+        return ERROR_INT("&found not defined", __func__, 1);
+    *pfound = FALSE;
+    if (!src || !chars)
+        return ERROR_INT("src and chars not both defined", __func__, 1);
+
+    n = strlen(src);
+    for (i = 0; i < n; i++) {
+        ch = src[i];
+        if (strchr(chars, ch)) {
+            *pfound = TRUE;
+            break;
+        }
+    }
+    return 0;
+}
+
+
+/*!
+ * \brief   stringRemoveChars()
+ *
+ * \param[in]    src        input string; can be of zero length
+ * \param[in]    remchars   string of chars to be removed from src
+ * \return  dest string with specified chars removed, or NULL on error
+ */
+char *
+stringRemoveChars(const char  *src,
+                  const char  *remchars)
+{
+char     ch;
+char    *dest;
+l_int32  nsrc, i, k;
+
+    if (!src)
+        return (char *)ERROR_PTR("src not defined", __func__, NULL);
+    if (!remchars)
+        return stringNew(src);
+
+    if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("dest not made", __func__, NULL);
+    nsrc = strlen(src);
+    for (i = 0, k = 0; i < nsrc; i++) {
+        ch = src[i];
+        if (!strchr(remchars, ch))
+            dest[k++] = ch;
+    }
+
+    return dest;
+}
+
+
+/*!
+ * \brief   stringReplaceEachSubstr()
+ *
+ * \param[in]    src      input string; can be of zero length
+ * \param[in]    sub1     substring to be replaced
+ * \param[in]    sub2     substring to put in; can be ""
+ * \param[out]   pcount   [optional] the number of times that sub1
+ *                        is found in src; 0 if not found
+ * \return  dest string with substring replaced, or NULL if the
+ *              substring not found or on error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a wrapper for simple string substitution that uses
+ *          the more general function arrayReplaceEachSequence().
+ *      (2) This finds every non-overlapping occurrence of %sub1 in
+ *          %src, and replaces it with %sub2.  By "non-overlapping"
+ *          we mean that after it finds each match, it removes the
+ *          matching characters, replaces with the substitution string
+ *          (if not empty), and continues.  For example, if you replace
+ *          'aa' by 'X' in 'baaabbb', you find one match at position 1
+ *          and return 'bXabbb'.
+ *      (3) To only remove each instance of sub1, use "" for sub2
+ *      (4) Returns a copy of %src if sub1 and sub2 are the same.
+ *      (5) If the input %src is binary data that can have null characters,
+ *          use arrayReplaceEachSequence() directly.
+ * </pre>
+ */
+char *
+stringReplaceEachSubstr(const char  *src,
+                        const char  *sub1,
+                        const char  *sub2,
+                        l_int32     *pcount)
+{
+size_t  datalen;
+
+    if (pcount) *pcount = 0;
+    if (!src || !sub1 || !sub2)
+        return (char *)ERROR_PTR("src, sub1, sub2 not all defined",
+                                 __func__, NULL);
+
+    if (strlen(sub2) > 0) {
+        return (char *)arrayReplaceEachSequence(
+                               (const l_uint8 *)src, strlen(src),
+                               (const l_uint8 *)sub1, strlen(sub1),
+                               (const l_uint8 *)sub2, strlen(sub2),
+                               &datalen, pcount);
+    } else {  /* empty replacement string; removal only */
+        return (char *)arrayReplaceEachSequence(
+                               (const l_uint8 *)src, strlen(src),
+                               (const l_uint8 *)sub1, strlen(sub1),
+                               NULL, 0, &datalen, pcount);
+    }
+}
+
+
+/*!
+ * \brief   stringReplaceSubstr()
+ *
+ * \param[in]      src      input string; can be of zero length
+ * \param[in]      sub1     substring to be replaced
+ * \param[in]      sub2     substring to put in; can be ""
+ * \param[in,out]  ploc     [optional] input start location for search;
+ *                          returns the loc after replacement
+ * \param[out]     pfound   [optional] 1 if sub1 is found; 0 otherwise
+ * \return  dest string with substring replaced, or NULL on error.
+ *
+ * <pre>
+ * Notes:
+ *      (1) Replaces the first instance.
+ *      (2) To remove sub1 without replacement, use "" for sub2.
+ *      (3) Returns a copy of %src if either no instance of %sub1 is found,
+ *          or if %sub1 and %sub2 are the same.
+ *      (4) If %ploc == NULL, the search will start at the beginning of %src.
+ *          If %ploc != NULL, *ploc must be initialized to the byte offset
+ *          within %src from which the search starts.  To search the
+ *          string from the beginning, set %loc = 0 and input &loc.
+ *          After finding %sub1 and replacing it with %sub2, %loc will be
+ *          returned as the next position after %sub2 in the output string.
+ *      (5) Note that the output string also includes all the characters
+ *          from the input string that occur after the single substitution.
+ * </pre>
+ */
+char *
+stringReplaceSubstr(const char  *src,
+                    const char  *sub1,
+                    const char  *sub2,
+                    l_int32     *ploc,
+                    l_int32     *pfound)
+{
+const char  *ptr;
+char        *dest;
+l_int32      nsrc, nsub1, nsub2, len, npre, loc;
+
+    if (pfound) *pfound = 0;
+    if (!src || !sub1 || !sub2)
+        return (char *)ERROR_PTR("src, sub1, sub2 not all defined",
+                                 __func__, NULL);
+
+    if (ploc)
+        loc = *ploc;
+    else
+        loc = 0;
+    if (!strcmp(sub1, sub2))
+        return stringNew(src);
+    if ((ptr = strstr(src + loc, sub1)) == NULL)
+        return stringNew(src);
+    if (pfound) *pfound = 1;
+
+    nsrc = strlen(src);
+    nsub1 = strlen(sub1);
+    nsub2 = strlen(sub2);
+    len = nsrc + nsub2 - nsub1;
+    if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
+        return (char *)ERROR_PTR("dest not made", __func__, NULL);
+    npre = ptr - src;
+    memcpy(dest, src, npre);
+    strcpy(dest + npre, sub2);
+    strcpy(dest + npre + nsub2, ptr + nsub1);
+    if (ploc) *ploc = npre + nsub2;
+    return dest;
+}
+
+
+/*!
+ * \brief   stringFindEachSubstr()
+ *
+ * \param[in]    src        input string; can be of zero length
+ * \param[in]    sub        substring to be searched for
+ * \return  dna of offsets where the sequence is found, or NULL if
+ *              none are found or on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This finds every non-overlapping occurrence in %src of %sub.
+ *          After it finds each match, it moves forward in %src by the length
+ *          of %sub before continuing the search.  So for example,
+ *          if you search for the sequence 'aa' in the data 'baaabbb',
+ *          you find one match at position 1.
+
+ * </pre>
+ */
+L_DNA *
+stringFindEachSubstr(const char  *src,
+                     const char  *sub)
+{
+    if (!src || !sub)
+        return (L_DNA *)ERROR_PTR("src, sub not both defined", __func__, NULL);
+
+    return arrayFindEachSequence((const l_uint8 *)src, strlen(src),
+                                 (const l_uint8 *)sub, strlen(sub));
+}
+
+
+/*!
+ * \brief   stringFindSubstr()
+ *
+ * \param[in]    src     input string; can be of zero length
+ * \param[in]    sub     substring to be searched for; must not be empty
+ * \param[out]   ploc    [optional] location of substring in src
+ * \return  1 if found; 0 if not found or on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This is a wrapper around strstr().  It finds the first
+ *          instance of %sub in %src.  If the substring is not found
+ *          and the location is returned, it has the value -1.
+ *      (2) Both %src and %sub must be defined, and %sub must have
+ *          length of at least 1.
+ * </pre>
+ */
+l_int32
+stringFindSubstr(const char  *src,
+                 const char  *sub,
+                 l_int32     *ploc)
+{
+const char *ptr;
+
+    if (ploc) *ploc = -1;
+    if (!src || !sub)
+        return ERROR_INT("src and sub not both defined", __func__, 0);
+    if (strlen(sub) == 0)
+        return ERROR_INT("substring length 0", __func__, 0);
+    if (strlen(src) == 0)
+        return 0;
+
+    if ((ptr = strstr(src, sub)) == NULL)  /* not found */
+        return 0;
+
+    if (ploc)
+        *ploc = ptr - src;
+    return 1;
+}
+
+
+/*!
+ * \brief   arrayReplaceEachSequence()
+ *
+ * \param[in]    datas       source byte array
+ * \param[in]    dataslen    length of source data, in bytes
+ * \param[in]    seq         subarray of bytes to find in source data
+ * \param[in]    seqlen      length of subarray, in bytes
+ * \param[in]    newseq      replacement subarray; can be null
+ * \param[in]    newseqlen   length of replacement subarray, in bytes
+ * \param[out]   pdatadlen   length of dest byte array, in bytes
+ * \param[out]   pcount      [optional] the number of times that sub1
+ *                           is found in src; 0 if not found
+ * \return  datad   with all all subarrays replaced (or removed)
+ *
+ * <pre>
+ * Notes:
+ *      (1) The byte arrays %datas, %seq and %newseq are not C strings,
+ *          because they can contain null bytes.  Therefore, for each
+ *          we must give the length of the array.
+ *      (2) If %newseq == NULL, this just removes all instances of %seq.
+ *          Otherwise, it replaces every non-overlapping occurrence of
+ *          %seq in %datas with %newseq. A new array %datad and its
+ *          size are returned.  See arrayFindEachSequence() for more
+ *          details on finding non-overlapping occurrences.
+ *      (3) If no instances of %seq are found, this returns a copy of %datas.
+ *      (4) The returned %datad is null terminated.
+ *      (5) Can use stringReplaceEachSubstr() if using C strings.
+ * </pre>
+ */
+l_uint8 *
+arrayReplaceEachSequence(const l_uint8  *datas,
+                         size_t          dataslen,
+                         const l_uint8  *seq,
+                         size_t          seqlen,
+                         const l_uint8  *newseq,
+                         size_t          newseqlen,
+                         size_t         *pdatadlen,
+                         l_int32        *pcount)
+{
+l_uint8  *datad;
+size_t    newsize;
+l_int32   n, i, j, di, si, index, incr;
+L_DNA    *da;
+
+    if (pcount) *pcount = 0;
+    if (!datas || !seq)
+        return (l_uint8 *)ERROR_PTR("datas & seq not both defined",
+                                    __func__, NULL);
+    if (!pdatadlen)
+        return (l_uint8 *)ERROR_PTR("&datadlen not defined", __func__, NULL);
+    *pdatadlen = 0;
+
+        /* Identify the locations of the sequence.  If there are none,
+         * return a copy of %datas. */
+    if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) {
+        *pdatadlen = dataslen;
+        return l_binaryCopy(datas, dataslen);
+    }
+
+        /* Allocate the output data; insure null termination */
+    n = l_dnaGetCount(da);
+    if (pcount) *pcount = n;
+    if (!newseq) newseqlen = 0;
+    newsize = dataslen + n * (newseqlen - seqlen) + 4;
+    if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) {
+        l_dnaDestroy(&da);
+        return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL);
+    }
+
+        /* Replace each sequence instance with a new sequence */
+    l_dnaGetIValue(da, 0, &si);
+    for (i = 0, di = 0, index = 0; i < dataslen; i++) {
+        if (i == si) {
+            index++;
+            if (index < n) {
+                l_dnaGetIValue(da, index, &si);
+                incr = L_MIN(seqlen, si - i);  /* amount to remove from datas */
+            } else {
+                incr = seqlen;
+            }
+            i += incr - 1;  /* jump over the matched sequence in datas */
+            if (newseq) {  /* add new sequence to datad */
+                for (j = 0; j < newseqlen; j++)
+                    datad[di++] = newseq[j];
+            }
+        } else {
+            datad[di++] = datas[i];
+        }
+    }
+
+    *pdatadlen = di;
+    l_dnaDestroy(&da);
+    return datad;
+}
+
+
+/*!
+ * \brief   arrayFindEachSequence()
+ *
+ * \param[in]    data       byte array
+ * \param[in]    datalen    length of data, in bytes
+ * \param[in]    sequence   subarray of bytes to find in data
+ * \param[in]    seqlen     length of sequence, in bytes
+ * \return  dna of offsets where the sequence is found, or NULL if
+ *              none are found or on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The byte arrays %data and %sequence are not C strings,
+ *          because they can contain null bytes.  Therefore, for each
+ *          we must give the length of the array.
+ *      (2) This finds every non-overlapping occurrence in %data of %sequence.
+ *          After it finds each match, it moves forward by the length
+ *          of the sequence before continuing the search.  So for example,
+ *          if you search for the sequence 'aa' in the data 'baaabbb',
+ *          you find one match at position 1.
+ * </pre>
+ */
+L_DNA *
+arrayFindEachSequence(const l_uint8  *data,
+                      size_t          datalen,
+                      const l_uint8  *sequence,
+                      size_t          seqlen)
+{
+l_int32  start, offset, realoffset, found;
+L_DNA   *da;
+
+    if (!data || !sequence)
+        return (L_DNA *)ERROR_PTR("data & sequence not both defined",
+                                  __func__, NULL);
+
+    da = l_dnaCreate(0);
+    start = 0;
+    while (1) {
+        arrayFindSequence(data + start, datalen - start, sequence, seqlen,
+                          &offset, &found);
+        if (found == FALSE)
+            break;
+
+        realoffset = start + offset;
+        l_dnaAddNumber(da, realoffset);
+        start = realoffset + seqlen;
+        if (start >= datalen)
+            break;
+    }
+
+    if (l_dnaGetCount(da) == 0)
+        l_dnaDestroy(&da);
+    return da;
+}
+
+
+/*!
+ * \brief   arrayFindSequence()
+ *
+ * \param[in]    data       byte array
+ * \param[in]    datalen    length of data, in bytes
+ * \param[in]    sequence   subarray of bytes to find in data
+ * \param[in]    seqlen     length of sequence, in bytes
+ * \param[out]   poffset    offset from beginning of
+ *                          data where the sequence begins
+ * \param[out]   pfound     1 if sequence is found; 0 otherwise
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The byte arrays 'data' and 'sequence' are in general not C strings,
+ *          because they can contain null bytes.  Therefore, for each
+ *          we must give the length of the array.
+ *      (2) This searches for the first occurrence in %data of %sequence,
+ *          which consists of %seqlen bytes.  The parameter %seqlen
+ *          must not exceed the actual length of the %sequence byte array.
+ *      (3) If either byte array is a C string, cast the array to
+ *          (const l_uint8 *) and use strlen() on the string for its length.
+ *      (4) If the sequence is not found, the offset will be 0, so you
+ *          must check %found.
+ * </pre>
+ */
+l_ok
+arrayFindSequence(const l_uint8  *data,
+                  size_t          datalen,
+                  const l_uint8  *sequence,
+                  size_t          seqlen,
+                  l_int32        *poffset,
+                  l_int32        *pfound)
+{
+l_int32  i, j, found, lastpos;
+
+    if (poffset) *poffset = 0;
+    if (pfound) *pfound = FALSE;
+    if (!data || !sequence)
+        return ERROR_INT("data & sequence not both defined", __func__, 1);
+    if (!poffset || !pfound)
+        return ERROR_INT("&offset and &found not defined", __func__, 1);
+
+    lastpos = datalen - seqlen + 1;
+    found = FALSE;
+    for (i = 0; i < lastpos; i++) {
+        for (j = 0; j < seqlen; j++) {
+            if (data[i + j] != sequence[j])
+                 break;
+            if (j == seqlen - 1)
+                 found = TRUE;
+        }
+        if (found == TRUE)
+            break;
+    }
+
+    if (found == TRUE) {
+        *poffset = i;
+        *pfound = TRUE;
+    }
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                             Safe realloc                           *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   reallocNew()
+ *
+ * \param[in,out]  pindata    nulls indata before reallocing
+ * \param[in]      oldsize    size of input data to be copied, in bytes
+ * \param[in]      newsize    size of buffer to be reallocated in bytes
+ * \return  ptr to new data, or NULL on error
+ *
+ *  Action: !N.B. 3) and (4!
+ *      1 Allocates memory, initialized to 0
+ *      2 Copies as much of the input data as possible
+ *          to the new block, truncating the copy if necessary
+ *      3 Frees the input data
+ *      4 Zeroes the input data ptr
+ *
+ * <pre>
+ * Notes:
+ *      (1) If newsize == 0, frees input data and nulls ptr
+ *      (2) If input data is null, only callocs new memory
+ *      (3) This differs from realloc in that it always allocates
+ *          new memory (if newsize > 0) and initializes it to 0,
+ *          it requires the amount of old data to be copied,
+ *          and it takes the address of the input ptr and
+ *          nulls the handle.
+ * </pre>
+ */
+void *
+reallocNew(void  **pindata,
+           size_t  oldsize,
+           size_t  newsize)
+{
+size_t   minsize;
+void    *indata;
+void    *newdata;
+
+    if (!pindata)
+        return ERROR_PTR("input data not defined", __func__, NULL);
+    indata = *pindata;
+
+    if (newsize == 0) {   /* nonstandard usage */
+        if (indata) {
+            LEPT_FREE(indata);
+            *pindata = NULL;
+        }
+        return NULL;
+    }
+
+    if (!indata) {  /* nonstandard usage */
+        if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
+            return ERROR_PTR("newdata not made", __func__, NULL);
+        return newdata;
+    }
+
+        /* Standard usage */
+    if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
+        return ERROR_PTR("newdata not made", __func__, NULL);
+    minsize = L_MIN(oldsize, newsize);
+    memcpy(newdata, indata, minsize);
+    LEPT_FREE(indata);
+    *pindata = NULL;
+    return newdata;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                 Read and write between file and memory             *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   l_binaryRead()
+ *
+ * \param[in]    filename
+ * \param[out]   pnbytes    number of bytes read
+ * \return  data, or NULL on error
+ */
+l_uint8 *
+l_binaryRead(const char  *filename,
+             size_t      *pnbytes)
+{
+l_uint8  *data;
+FILE     *fp;
+
+    if (!pnbytes)
+        return (l_uint8 *)ERROR_PTR("pnbytes not defined", __func__, NULL);
+    *pnbytes = 0;
+    if (!filename)
+        return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL);
+
+    if ((fp = fopenReadStream(filename)) == NULL)
+        return (l_uint8 *)ERROR_PTR_1("file stream not opened",
+                                      filename, __func__, NULL);
+    data = l_binaryReadStream(fp, pnbytes);
+    fclose(fp);
+    return data;
+}
+
+
+/*!
+ * \brief   l_binaryReadStream()
+ *
+ * \param[in]    fp        file stream opened to read; can be stdin
+ * \param[out]   pnbytes   number of bytes read
+ * \return  null-terminated array, or NULL on error; reading 0 bytes
+ *          is not an error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The returned array is terminated with a null byte so that it can
+ *          be used to read ascii data from a file into a proper C string.
+ *      (2) This can be used to capture data that is piped in via stdin,
+ *          because it does not require seeking within the file.
+ *      (3) For example, you can read an image from stdin into memory
+ *          using shell redirection, with one of these shell commands:
+ * \code
+ *             cat <imagefile> | readprog
+ *             readprog < <imagefile>
+ * \endcode
+ *          where readprog is:
+ * \code
+ *             l_uint8 *data = l_binaryReadStream(stdin, &nbytes);
+ *             Pix *pix = pixReadMem(data, nbytes);
+ * \endcode
+ * </pre>
+ */
+l_uint8 *
+l_binaryReadStream(FILE    *fp,
+                   size_t  *pnbytes)
+{
+l_uint8    *data;
+l_int32     seekable, navail, nadd, nread;
+L_BBUFFER  *bb;
+
+    if (!pnbytes)
+        return (l_uint8 *)ERROR_PTR("&nbytes not defined", __func__, NULL);
+    *pnbytes = 0;
+    if (!fp)
+        return (l_uint8 *)ERROR_PTR("fp not defined", __func__, NULL);
+
+        /* Test if the stream is seekable, by attempting to seek to
+         * the start of data.  This is a no-op.  If it is seekable, use
+         * l_binaryReadSelectStream() to determine the size of the
+         * data to be read in advance. */
+    seekable = (ftell(fp) == 0) ? 1 : 0;
+    if (seekable)
+        return l_binaryReadSelectStream(fp, 0, 0, pnbytes);
+
+        /* If it is not seekable, use the bbuffer to realloc memory
+         * as needed during reading. */
+    bb = bbufferCreate(NULL, 4096);
+    while (1) {
+        navail = bb->nalloc - bb->n;
+        if (navail < 4096) {
+             nadd = L_MAX(bb->nalloc, 4096);
+             bbufferExtendArray(bb, nadd);
+        }
+        nread = fread((void *)(bb->array + bb->n), 1, 4096, fp);
+        bb->n += nread;
+        if (nread != 4096) break;
+    }
+
+        /* Copy the data to a new array sized for the data, because
+         * the bbuffer array can be nearly twice the size we need. */
+    if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) {
+        memcpy(data, bb->array, bb->n);
+        *pnbytes = bb->n;
+    } else {
+        L_ERROR("calloc fail for data\n", __func__);
+    }
+
+    bbufferDestroy(&bb);
+    return data;
+}
+
+
+/*!
+ * \brief   l_binaryReadSelect()
+ *
+ * \param[in]    filename
+ * \param[in]    start     first byte to read
+ * \param[in]    nbytes    number of bytes to read; use 0 to read to end of file
+ * \param[out]   pnread    number of bytes actually read
+ * \return  data, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The returned array is terminated with a null byte so that it can
+ *          be used to read ascii data from a file into a proper C string.
+ * </pre>
+ */
+l_uint8 *
+l_binaryReadSelect(const char  *filename,
+                   size_t       start,
+                   size_t       nbytes,
+                   size_t      *pnread)
+{
+l_uint8  *data;
+FILE     *fp;
+
+    if (!pnread)
+        return (l_uint8 *)ERROR_PTR("pnread not defined", __func__, NULL);
+    *pnread = 0;
+    if (!filename)
+        return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL);
+
+    if ((fp = fopenReadStream(filename)) == NULL)
+        return (l_uint8 *)ERROR_PTR_1("file stream not opened",
+                                      filename, __func__, NULL);
+    data = l_binaryReadSelectStream(fp, start, nbytes, pnread);
+    fclose(fp);
+    return data;
+}
+
+
+/*!
+ * \brief   l_binaryReadSelectStream()
+ *
+ * \param[in]    fp       file stream
+ * \param[in]    start    first byte to read
+ * \param[in]    nbytes   number of bytes to read; use 0 to read to end of file
+ * \param[out]   pnread   number of bytes actually read
+ * \return  null-terminated array, or NULL on error; reading 0 bytes
+ *          is not an error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The returned array is terminated with a null byte so that it can
+ *          be used to read ascii data from a file into a proper C string.
+ *          If the file to be read is empty and %start == 0, an array
+ *          with a single null byte is returned.
+ *      (2) Side effect: the stream pointer is re-positioned to the
+ *          beginning of the file.
+ * </pre>
+ */
+l_uint8 *
+l_binaryReadSelectStream(FILE    *fp,
+                         size_t   start,
+                         size_t   nbytes,
+                         size_t  *pnread)
+{
+l_uint8  *data;
+size_t    bytesleft, bytestoread, nread, filebytes;
+
+    if (!pnread)
+        return (l_uint8 *)ERROR_PTR("&nread not defined", __func__, NULL);
+    *pnread = 0;
+    if (!fp)
+        return (l_uint8 *)ERROR_PTR("stream not defined", __func__, NULL);
+
+        /* Verify and adjust the parameters if necessary */
+    fseek(fp, 0, SEEK_END);  /* EOF */
+    filebytes = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+    if (start > filebytes) {
+        L_ERROR("start = %zu but filebytes = %zu\n", __func__,
+                start, filebytes);
+        return NULL;
+    }
+    if (filebytes == 0)  /* start == 0; nothing to read; return null byte */
+        return (l_uint8 *)LEPT_CALLOC(1, 1);
+    bytesleft = filebytes - start;  /* greater than 0 */
+    if (nbytes == 0) nbytes = bytesleft;
+    bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft;
+
+        /* Read the data */
+    if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL)
+        return (l_uint8 *)ERROR_PTR("calloc fail for data", __func__, NULL);
+    fseek(fp, start, SEEK_SET);
+    nread = fread(data, 1, bytestoread, fp);
+    if (nbytes != nread)
+        L_INFO("%zu bytes requested; %zu bytes read\n", __func__,
+               nbytes, nread);
+    *pnread = nread;
+    fseek(fp, 0, SEEK_SET);
+    return data;
+}
+
+
+/*!
+ * \brief   l_binaryWrite()
+ *
+ * \param[in]    filename     output file
+ * \param[in]    operation    "w" for write; "a" for append
+ * \param[in]    data         binary data to be written
+ * \param[in]    nbytes       size of data array
+ * \return  0 if OK; 1 on error
+ */
+l_ok
+l_binaryWrite(const char  *filename,
+              const char  *operation,
+              const void  *data,
+              size_t       nbytes)
+{
+char   actualOperation[20];
+FILE  *fp;
+
+    if (!filename)
+        return ERROR_INT("filename not defined", __func__, 1);
+    if (!operation)
+        return ERROR_INT("operation not defined", __func__, 1);
+    if (!data)
+        return ERROR_INT("data not defined", __func__, 1);
+    if (nbytes <= 0)
+        return ERROR_INT("nbytes must be > 0", __func__, 1);
+
+    if (strcmp(operation, "w") && strcmp(operation, "a"))
+        return ERROR_INT("operation not one of {'w','a'}", __func__, 1);
+
+        /* The 'b' flag to fopen() is ignored for all POSIX
+         * conforming systems.  However, Windows needs the 'b' flag. */
+    stringCopy(actualOperation, operation, 2);
+    stringCat(actualOperation, 20, "b");
+
+    if ((fp = fopenWriteStream(filename, actualOperation)) == NULL)
+        return ERROR_INT_1("stream not opened", filename, __func__, 1);
+    fwrite(data, 1, nbytes, fp);
+    fclose(fp);
+    return 0;
+}
+
+
+/*!
+ * \brief   nbytesInFile()
+ *
+ * \param[in]    filename
+ * \return  nbytes in file; 0 on error
+ */
+size_t
+nbytesInFile(const char  *filename)
+{
+size_t  nbytes;
+FILE   *fp;
+
+    if (!filename)
+        return ERROR_INT("filename not defined", __func__, 0);
+    if ((fp = fopenReadStream(filename)) == NULL)
+        return ERROR_INT_1("stream not opened", filename, __func__, 0);
+    nbytes = fnbytesInFile(fp);
+    fclose(fp);
+    return nbytes;
+}
+
+
+/*!
+ * \brief   fnbytesInFile()
+ *
+ * \param[in]    fp    file stream
+ * \return  nbytes in file; 0 on error
+ */
+size_t
+fnbytesInFile(FILE  *fp)
+{
+l_int64  pos, nbytes;
+
+    if (!fp)
+        return ERROR_INT("stream not open", __func__, 0);
+
+    pos = ftell(fp);          /* initial position */
+    if (pos < 0)
+        return ERROR_INT("seek position must be > 0", __func__, 0);
+    fseek(fp, 0, SEEK_END);   /* EOF */
+    nbytes = ftell(fp);
+    if (nbytes < 0)
+        return ERROR_INT("nbytes is < 0", __func__, 0);
+    fseek(fp, pos, SEEK_SET);        /* back to initial position */
+    return nbytes;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                     Copy and compare in memory                     *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   l_binaryCopy()
+ *
+ * \param[in]    datas
+ * \param[in]    size    of data array
+ * \return  datad on heap, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) We add 4 bytes to the zeroed output because in some cases
+ *          (e.g., string handling) it is important to have the data
+ *          be null terminated.  This guarantees that after the memcpy,
+ *          the result is automatically null terminated.
+ * </pre>
+ */
+l_uint8 *
+l_binaryCopy(const l_uint8  *datas,
+             size_t          size)
+{
+l_uint8  *datad;
+
+    if (!datas)
+        return (l_uint8 *)ERROR_PTR("datas not defined", __func__, NULL);
+
+    if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL)
+        return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL);
+    memcpy(datad, datas, size);
+    return datad;
+}
+
+
+/*!
+ * \brief   l_binaryCompare()
+ *
+ * \param[in]    data1
+ * \param[in]    size1   of data1
+ * \param[in]    data2
+ * \param[in]    size2   of data1
+ * \param[out]   psame  (1 if the same, 0 if different)
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This can also be used to compare C strings str1 and str2.
+ *          If the string lengths are not known, use strlen():
+ *            l_binaryCompare((l_uint8 *)str1, strlen(str1),
+                              (l_uint8 *)str2, strlen(str2));
+ * </pre>
+ */
+l_ok
+l_binaryCompare(const l_uint8  *data1,
+                size_t          size1,
+                const l_uint8  *data2,
+                size_t          size2,
+                l_int32        *psame)
+{
+l_int32  i;
+
+    if (!psame)
+        return ERROR_INT("&same not defined", __func__, 1);
+    *psame = FALSE;
+    if (!data1 || !data2)
+        return ERROR_INT("data1 and data2 not both defined", __func__, 1);
+    if (size1 != size2) return 0;
+    for (i = 0; i < size1; i++) {
+        if (data1[i] != data2[i])
+            return 0;
+    }
+    *psame = TRUE;
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                         File copy operations                       *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   fileCopy()
+ *
+ * \param[in]    srcfile   copy from this file
+ * \param[in]    newfile   copy to this file
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+fileCopy(const char  *srcfile,
+         const char  *newfile)
+{
+l_int32   ret;
+size_t    nbytes;
+l_uint8  *data;
+
+    if (!srcfile)
+        return ERROR_INT("srcfile not defined", __func__, 1);
+    if (!newfile)
+        return ERROR_INT("newfile not defined", __func__, 1);
+
+    if ((data = l_binaryRead(srcfile, &nbytes)) == NULL)
+        return ERROR_INT("data not returned", __func__, 1);
+    ret = l_binaryWrite(newfile, "w", data, nbytes);
+    LEPT_FREE(data);
+    return ret;
+}
+
+
+/*!
+ * \brief   fileConcatenate()
+ *
+ * \param[in]    srcfile   append data from this file
+ * \param[in]    destfile  add data to this file
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+fileConcatenate(const char  *srcfile,
+                const char  *destfile)
+{
+size_t    nbytes;
+l_uint8  *data;
+
+    if (!srcfile)
+        return ERROR_INT("srcfile not defined", __func__, 1);
+    if (!destfile)
+        return ERROR_INT("destfile not defined", __func__, 1);
+
+    data = l_binaryRead(srcfile, &nbytes);
+    l_binaryWrite(destfile, "a", data, nbytes);
+    LEPT_FREE(data);
+    return 0;
+}
+
+
+/*!
+ * \brief   fileAppendString()
+ *
+ * \param[in]    filename
+ * \param[in]    str       string to append to file
+ * \return  0 if OK, 1 on error
+ */
+l_ok
+fileAppendString(const char  *filename,
+                 const char  *str)
+{
+FILE  *fp;
+
+    if (!filename)
+        return ERROR_INT("filename not defined", __func__, 1);
+    if (!str)
+        return ERROR_INT("str not defined", __func__, 1);
+
+    if ((fp = fopenWriteStream(filename, "a")) == NULL)
+        return ERROR_INT_1("stream not opened", filename, __func__, 1);
+    fprintf(fp, "%s", str);
+    fclose(fp);
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                         File split operations                      *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   fileSplitLinesUniform()
+ *
+ * \param[in]    filename      input file
+ * \param[in]    n             number of output files (>= 1)
+ * \param[in]    save_empty    1 to save empty lines; 0 to remove them
+ * \param[in]    rootpath      root pathname of output files
+ * \param[in]    ext           output extension, including the '.'; can be NULL
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This splits an input text file into %n files with roughly
+ *          equal numbers of text lines in each file.
+ *      (2) if %save_empty == 1, empty lines are included, and concatention
+ *          of the text in the split files will be identical to the original.
+ *      (3) The output filenames are in the form:
+ *               <rootpath>_N.<ext>, N = 1, ... n
+ *      (4) This handles the temp directory pathname conversion where needed:
+ *              /tmp  ==>  [OS specific temp directory]
+ *      (5) Files can also be sharded into sets of lines by the program 'split':
+ *              split -n l/<n> <filename>
+ *          Using 'split', the resulting files have approximately equal
+ *          numbers of bytes, rather than equal numbers of lines.
+ * </pre>
+ */
+l_ok
+fileSplitLinesUniform(const char  *filename,
+                      l_int32      n,
+                      l_int32      save_empty,
+                      const char  *rootpath,
+                      const char  *ext)
+{
+l_int32   i, totlines, nlines, index;
+size_t    nbytes;
+l_uint8  *data;
+char     *str;
+char      outname[512];
+NUMA     *na;
+SARRAY   *sa;
+
+    if (!filename)
+        return ERROR_INT("filename not defined", __func__, 1);
+    if (!rootpath)
+        return ERROR_INT("rootpath not defined", __func__, 1);
+    if (n <= 0)
+        return ERROR_INT("n must be > 0", __func__, 1);
+    if (save_empty != 0 && save_empty != 1)
+        return ERROR_INT("save_empty not 0 or 1", __func__, 1);
+
+        /* Make sarray of lines; the newlines are stripped off */
+    if ((data = l_binaryRead(filename, &nbytes)) == NULL)
+        return ERROR_INT("data not read", __func__, 1);
+    sa = sarrayCreateLinesFromString((const char *)data, save_empty);
+    LEPT_FREE(data);
+    if (!sa)
+        return ERROR_INT("sa not made", __func__, 1);
+    totlines = sarrayGetCount(sa);
+    if (n > totlines) {
+        sarrayDestroy(&sa);
+        L_ERROR("num files = %d > num lines = %d\n", __func__, n, totlines);
+        return 1;
+    }
+
+        /* Write n sets of lines to n files, adding the newlines back */
+    na = numaGetUniformBinSizes(totlines, n);
+    index = 0;
+    for (i = 0; i < n; i++) {
+        if (ext == NULL)
+            snprintf(outname, sizeof(outname), "%s_%d", rootpath, i);
+        else
+            snprintf(outname, sizeof(outname), "%s_%d%s", rootpath, i, ext);
+        numaGetIValue(na, i, &nlines);
+        str = sarrayToStringRange(sa, index, nlines, 1);  /* add newlines */
+        l_binaryWrite(outname, "w", str, strlen(str));
+        LEPT_FREE(str);
+        index += nlines;
+    }
+    numaDestroy(&na);
+    sarrayDestroy(&sa);
+    return 0;
+}
+
+
+/*--------------------------------------------------------------------*
+ *          Multi-platform functions for opening file streams         *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   fopenReadStream()
+ *
+ * \param[in]    filename
+ * \return  stream, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This should be used whenever you want to run fopen() to
+ *          read from a stream.  Never call fopen() directory.
+ *      (2) This handles the temp directory pathname conversion where needed:
+ *              /tmp  ==>  [OS specific temp directory]
+ * </pre>
+ */
+FILE *
+fopenReadStream(const char  *filename)
+{
+char  *fname, *tail;
+FILE  *fp;
+
+    if (!filename)
+        return (FILE *)ERROR_PTR("filename not defined", __func__, NULL);
+
+        /* Try input filename */
+    fname = genPathname(filename, NULL);
+    fp = fopen(fname, "rb");
+    LEPT_FREE(fname);
+    if (fp) return fp;
+
+        /* Else, strip directory and try locally */
+    splitPathAtDirectory(filename, NULL, &tail);
+    if (!tail)
+        return (FILE*)ERROR_PTR_1("tail not found", filename, __func__, NULL);
+    fp = fopen(tail, "rb");
+    if (!fp)
+        L_ERROR("failed to open locally with tail %s for filename %s\n",
+                __func__, tail, filename);
+    LEPT_FREE(tail);
+    return fp;
+}
+
+
+/*!
+ * \brief   fopenWriteStream()
+ *
+ * \param[in]    filename
+ * \param[in]    modestring
+ * \return  stream, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This should be used whenever you want to run fopen() to
+ *          write or append to a stream.  Never call fopen() directory.
+ *      (2) This handles the temp directory pathname conversion where needed:
+ *              /tmp  ==>  [OS specific temp directory]
+ * </pre>
+ */
+FILE *
+fopenWriteStream(const char  *filename,
+                 const char  *modestring)
+{
+char  *fname;
+FILE  *fp;
+
+    if (!filename)
+        return (FILE *)ERROR_PTR("filename not defined", __func__, NULL);
+
+    fname = genPathname(filename, NULL);
+    fp = fopen(fname, modestring);
+    if (!fp)
+        fp = (FILE *)ERROR_PTR_1("stream not opened", fname, __func__, NULL);
+    LEPT_FREE(fname);
+    return fp;
+}
+
+
+/*!
+ * \brief   fopenReadFromMemory()
+ *
+ * \param[in]    data, size
+ * \return  file stream, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Work-around if fmemopen() not available.
+ *      (2) Windows tmpfile() writes into the root C:\ directory, which
+ *          requires admin privileges.  This also works around that.
+ * </pre>
+ */
+FILE *
+fopenReadFromMemory(const l_uint8  *data,
+                    size_t          size)
+{
+FILE  *fp;
+
+    if (!data)
+        return (FILE *)ERROR_PTR("data not defined", __func__, NULL);
+
+#if HAVE_FMEMOPEN
+    if ((fp = fmemopen((void *)data, size, "rb")) == NULL)
+        return (FILE *)ERROR_PTR("stream not opened", __func__, NULL);
+#else  /* write to tmp file */
+    L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__);
+  #ifdef _WIN32
+    if ((fp = fopenWriteWinTempfile()) == NULL)
+        return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL);
+  #else
+    if ((fp = tmpfile()) == NULL)
+        return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL);
+  #endif  /*  _WIN32 */
+    fwrite(data, 1, size, fp);
+    rewind(fp);
+#endif  /* HAVE_FMEMOPEN */
+
+    return fp;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                Opening a Windows tmpfile for writing               *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   fopenWriteWinTempfile()
+ *
+ * \return  file stream, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) The Windows version of tmpfile() writes into the root
+ *          C:\ directory, which requires admin privileges.  This
+ *          function provides an alternative implementation.
+ * </pre>
+ */
+FILE *
+fopenWriteWinTempfile(void)
+{
+#ifdef _WIN32
+l_int32  handle;
+FILE    *fp;
+char    *filename;
+
+    if ((filename = l_makeTempFilename()) == NULL) {
+        L_ERROR("l_makeTempFilename failed, %s\n", __func__, strerror(errno));
+        return NULL;
+    }
+
+    handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED |
+                   _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE);
+    lept_free(filename);
+    if (handle == -1) {
+        L_ERROR("_open failed, %s\n", __func__, strerror(errno));
+        return NULL;
+    }
+
+    if ((fp = _fdopen(handle, "r+b")) == NULL) {
+        L_ERROR("_fdopen failed, %s\n", __func__, strerror(errno));
+        return NULL;
+    }
+
+    return fp;
+#else
+    return NULL;
+#endif  /*  _WIN32 */
+}
+
+
+/*--------------------------------------------------------------------*
+ *       Multi-platform functions that avoid C-runtime boundary       *
+ *             crossing for applications with Windows DLLs            *
+ *--------------------------------------------------------------------*/
+/*
+ *  Problems arise when pointers to streams and data are passed
+ *  between two Windows DLLs that have been generated with different
+ *  C runtimes.  To avoid this, leptonica provides wrappers for
+ *  several C library calls.
+ */
+/*!
+ * \brief   lept_fopen()
+ *
+ * \param[in]    filename
+ * \param[in]    mode       same as for fopen(); e.g., "rb"
+ * \return  stream or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This must be used by any application that passes
+ *          a file handle to a leptonica Windows DLL.
+ * </pre>
+ */
+FILE *
+lept_fopen(const char  *filename,
+           const char  *mode)
+{
+    if (!filename)
+        return (FILE *)ERROR_PTR("filename not defined", __func__, NULL);
+    if (!mode)
+        return (FILE *)ERROR_PTR("mode not defined", __func__, NULL);
+
+    if (stringFindSubstr(mode, "r", NULL))
+        return fopenReadStream(filename);
+    else
+        return fopenWriteStream(filename, mode);
+}
+
+
+/*!
+ * \brief   lept_fclose()
+ *
+ * \param[in]    fp    file stream
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This should be used by any application that accepts
+ *          a file handle generated by a leptonica Windows DLL.
+ * </pre>
+ */
+l_ok
+lept_fclose(FILE *fp)
+{
+    if (!fp)
+        return ERROR_INT("stream not defined", __func__, 1);
+
+    return fclose(fp);
+}
+
+
+/*!
+ * \brief   lept_calloc()
+ *
+ * \param[in]    nmemb    number of members
+ * \param[in]    size     of each member
+ * \return  void ptr, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) For safety with Windows DLLs, this can be used in conjunction
+ *          with lept_free() to avoid C-runtime boundary problems.
+ *          Just use these two functions throughout your application.
+ * </pre>
+ */
+void *
+lept_calloc(size_t  nmemb,
+            size_t  size)
+{
+    if (nmemb <= 0 || size <= 0)
+        return NULL;
+    return LEPT_CALLOC(nmemb, size);
+}
+
+
+/*!
+ * \brief   lept_free()
+ *
+ * \param[in]    ptr
+ *
+ * <pre>
+ * Notes:
+ *      (1) This should be used by any application that accepts
+ *          heap data allocated by a leptonica Windows DLL.
+ * </pre>
+ */
+void
+lept_free(void *ptr)
+{
+    if (!ptr) return;
+    LEPT_FREE(ptr);
+}
+
+
+/*--------------------------------------------------------------------*
+ *                Multi-platform file system operations               *
+ *         [ These only write to /tmp or its subdirectories ]         *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   lept_mkdir()
+ *
+ * \param[in]    subdir    of /tmp or its OS specific equivalent
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) %subdir is a partial path that can consist of one or more
+ *          directories.
+ *      (2) This makes any subdirectories of /tmp that are required.
+ *      (3) The root temp directory is:
+ *            /tmp    (unix)  [default]
+ *            [Temp]  (Windows)
+ * </pre>
+ */
+l_int32
+lept_mkdir(const char  *subdir)
+{
+char     *dir, *tmpdir;
+l_int32   i, n;
+l_int32   ret = 0;
+SARRAY   *sa;
+#ifdef  _WIN32
+l_uint32  attributes;
+#endif  /* _WIN32 */
+
+    if (!LeptDebugOK) {
+        L_INFO("making named temp subdirectory %s is disabled\n",
+               __func__, subdir);
+        return 0;
+    }
+
+    if (!subdir)
+        return ERROR_INT("subdir not defined", __func__, 1);
+    if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
+        return ERROR_INT("subdir not an actual subdirectory", __func__, 1);
+
+    sa = sarrayCreate(0);
+    sarraySplitString(sa, subdir, "/");
+    n = sarrayGetCount(sa);
+    dir = genPathname("/tmp", NULL);
+       /* Make sure the tmp directory exists */
+#ifndef _WIN32
+    ret = mkdir(dir, 0777);
+#else
+    attributes = GetFileAttributesA(dir);
+    if (attributes == INVALID_FILE_ATTRIBUTES)
+        ret = (CreateDirectoryA(dir, NULL) ? 0 : 1);
+#endif
+        /* Make all the subdirectories */
+    for (i = 0; i < n; i++) {
+        tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY));
+#ifndef _WIN32
+        ret += mkdir(tmpdir, 0777);
+#else
+        if (CreateDirectoryA(tmpdir, NULL) == 0)
+            ret += (GetLastError() != ERROR_ALREADY_EXISTS);
+#endif
+        LEPT_FREE(dir);
+        dir = tmpdir;
+    }
+    LEPT_FREE(dir);
+    sarrayDestroy(&sa);
+    if (ret > 0)
+        L_ERROR("failure to create %d directories\n", __func__, ret);
+    return ret;
+}
+
+
+/*!
+ * \brief   lept_rmdir()
+ *
+ * \param[in]    subdir    of /tmp or its OS specific equivalent
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) %subdir is a partial path that can consist of one or more
+ *          directories.
+ *      (2) This removes all files from the specified subdirectory of
+ *          the root temp directory:
+ *            /tmp    (unix)
+ *            [Temp]  (Windows)
+ *          and then removes the subdirectory.
+ *      (3) The combination
+ *            lept_rmdir(subdir);
+ *            lept_mkdir(subdir);
+ *          is guaranteed to give you an empty subdirectory.
+ * </pre>
+ */
+l_int32
+lept_rmdir(const char  *subdir)
+{
+char    *dir, *fname, *fullname;
+l_int32  exists, ret, i, nfiles;
+SARRAY  *sa;
+#ifdef _WIN32
+char    *newpath;
+#else
+char    *realdir;
+#endif  /* _WIN32 */
+
+    if (!subdir)
+        return ERROR_INT("subdir not defined", __func__, 1);
+    if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
+        return ERROR_INT("subdir not an actual subdirectory", __func__, 1);
+
+        /* Find the temp subdirectory */
+    dir = pathJoin("/tmp", subdir);
+    if (!dir)
+        return ERROR_INT("directory name not made", __func__, 1);
+    lept_direxists(dir, &exists);
+    if (!exists) {  /* fail silently */
+        LEPT_FREE(dir);
+        return 0;
+    }
+
+        /* List all the files in that directory */
+    if ((sa = getFilenamesInDirectory(dir)) == NULL) {
+        L_ERROR("directory %s does not exist!\n", __func__, dir);
+        LEPT_FREE(dir);
+        return 1;
+    }
+    nfiles = sarrayGetCount(sa);
+
+    for (i = 0; i < nfiles; i++) {
+        fname = sarrayGetString(sa, i, L_NOCOPY);
+        fullname = genPathname(dir, fname);
+        remove(fullname);
+        LEPT_FREE(fullname);
+    }
+
+#ifndef _WIN32
+    realdir = genPathname("/tmp", subdir);
+    ret = rmdir(realdir);
+    LEPT_FREE(realdir);
+#else
+    newpath = genPathname(dir, NULL);
+    ret = (RemoveDirectoryA(newpath) ? 0 : 1);
+    LEPT_FREE(newpath);
+#endif  /* !_WIN32 */
+
+    sarrayDestroy(&sa);
+    LEPT_FREE(dir);
+    return ret;
+}
+
+
+/*!
+ * \brief   lept_direxists()
+ *
+ * \param[in]    dir
+ * \param[out]   pexists    1 if it exists; 0 otherwise
+ * \return  void
+ *
+ * <pre>
+ * Notes:
+ *      (1) Always use unix pathname separators.
+ *      (2) By calling genPathname(), if the pathname begins with "/tmp"
+ *          this does an automatic directory translation for operating
+ *          systems that use a different path for /tmp.
+ * </pre>
+ */
+void
+lept_direxists(const char  *dir,
+               l_int32     *pexists)
+{
+char  *realdir;
+
+    if (!pexists) return;
+    *pexists = 0;
+    if (!dir) return;
+    if ((realdir = genPathname(dir, NULL)) == NULL)
+        return;
+
+#ifndef _WIN32
+    {
+    struct stat s;
+    l_int32 err = stat(realdir, &s);
+    if (err != -1 && S_ISDIR(s.st_mode))
+        *pexists = 1;
+    }
+#else  /* _WIN32 */
+    {
+    l_uint32  attributes;
+    attributes = GetFileAttributesA(realdir);
+    if (attributes != INVALID_FILE_ATTRIBUTES &&
+        (attributes & FILE_ATTRIBUTE_DIRECTORY))
+        *pexists = 1;
+    }
+#endif  /* _WIN32 */
+
+    LEPT_FREE(realdir);
+}
+
+
+/*!
+ * \brief   lept_rm_match()
+ *
+ * \param[in]    subdir    [optional] if NULL, the removed files are in /tmp
+ * \param[in]    substr    [optional] pattern to match in filename
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This removes the matched files in /tmp or a subdirectory of /tmp.
+ *          Use NULL for %subdir if the files are in /tmp.
+ *      (2) If %substr == NULL, this removes all files in the directory.
+ *          If %substr == "" (empty), this removes no files.
+ *          If both %subdir == NULL and %substr == NULL, this removes
+ *          all files in /tmp.
+ *      (3) Use unix pathname separators.
+ *      (4) By calling genPathname(), if the pathname begins with "/tmp"
+ *          this does an automatic directory translation for operating
+ *          systems that use a different path for /tmp.
+ *      (5) Error conditions:
+ *            * returns -1 if the directory is not found
+ *            * returns the number of files (> 0) that it was unable to remove.
+ * </pre>
+ */
+l_int32
+lept_rm_match(const char  *subdir,
+              const char  *substr)
+{
+char    *path, *fname;
+char     tempdir[256];
+l_int32  i, n, ret;
+SARRAY  *sa;
+
+    makeTempDirname(tempdir, sizeof(tempdir), subdir);
+    if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL)
+        return ERROR_INT("sa not made", __func__, -1);
+    n = sarrayGetCount(sa);
+    if (n == 0) {
+        L_WARNING("no matching files found\n", __func__);
+        sarrayDestroy(&sa);
+        return 0;
+    }
+
+    ret = 0;
+    for (i = 0; i < n; i++) {
+        fname = sarrayGetString(sa, i, L_NOCOPY);
+        path = genPathname(fname, NULL);
+        if (lept_rmfile(path) != 0) {
+            L_ERROR("failed to remove %s\n", __func__, path);
+            ret++;
+        }
+        LEPT_FREE(path);
+    }
+    sarrayDestroy(&sa);
+    return ret;
+}
+
+
+/*!
+ * \brief   lept_rm()
+ *
+ * \param[in]    subdir    [optional] subdir of '/tmp'; can be NULL
+ * \param[in]    tail      filename without the directory
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) By calling genPathname(), this does an automatic directory
+ *          translation on operating systems which use a different path.
+ * </pre>
+ */
+l_int32
+lept_rm(const char  *subdir,
+        const char  *tail)
+{
+char    *path;
+char     newtemp[256];
+l_int32  ret;
+
+    if (!tail || strlen(tail) == 0)
+        return ERROR_INT("tail undefined or empty", __func__, 1);
+
+    if (makeTempDirname(newtemp, sizeof(newtemp), subdir))
+        return ERROR_INT("temp dirname not made", __func__, 1);
+    path = genPathname(newtemp, tail);
+    ret = lept_rmfile(path);
+    LEPT_FREE(path);
+    return ret;
+}
+
+
+/*!
+ * \brief
+ *
+ *  lept_rmfile()
+ *
+ * \param[in]    filepath     full path to file including the directory
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This removes the named file.
+ *      (2) Use unix pathname separators.
+ *      (3) There is no name translation.
+ *      (4) Unlike the other lept_* functions in this section, this can remove
+ *          any file -- it is not restricted to files that are in /tmp or a
+ *          subdirectory of it.
+ *      (5) For files in /tmp or a subdirectory of it, this does an automatic
+ *          directory translation for operating systems that use a different
+ *          path for /tmp.
+ * </pre>
+ */
+l_int32
+lept_rmfile(const char  *filepath)
+{
+l_int32  ret;
+
+    if (!filepath || strlen(filepath) == 0)
+        return ERROR_INT("filepath undefined or empty", __func__, 1);
+
+#ifndef _WIN32
+    ret = remove(filepath);
+#else
+        /* Set attributes to allow deletion of read-only files */
+    SetFileAttributesA(filepath, FILE_ATTRIBUTE_NORMAL);
+    ret = DeleteFileA(filepath) ? 0 : 1;
+#endif  /* !_WIN32 */
+
+    return ret;
+}
+
+
+/*!
+ * \brief   lept_mv()
+ *
+ * \param[in]    srcfile
+ * \param[in]    newdir     [optional]; can be NULL
+ * \param[in]    newtail    [optional]; can be NULL
+ * \param[out]   pnewpath   [optional] of actual path; can be NULL
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This moves %srcfile to /tmp or to a subdirectory of /tmp.
+ *      (2) %srcfile can either be a full path or relative to the
+ *          current directory.
+ *      (3) %newdir can either specify an existing subdirectory of /tmp
+ *          or can be NULL.  In the latter case, the file will be written
+ *          into /tmp.
+ *      (4) %newtail can either specify a filename tail or, if NULL,
+ *          the filename is taken from src-tail, the tail of %srcfile.
+ *      (5) For debugging, the computed newpath can be returned.  It must
+ *          be freed by the caller.
+ *      (6) Reminders:
+ *          (a) specify files using unix pathnames
+ *          (b) this does an automatic directory translation on operating
+ *              systems that use a different path for /tmp.
+ *      (7) Examples:
+ *          * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
+ *          * newdir = NULL,    newtail = abc     ==> /tmp/abc
+ *          * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
+ *          * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc
+ * </pre>
+ */
+l_int32
+lept_mv(const char  *srcfile,
+        const char  *newdir,
+        const char  *newtail,
+        char       **pnewpath)
+{
+char    *srcpath, *newpath, *dir, *srctail;
+char     newtemp[256];
+l_int32  ret;
+
+    if (!srcfile)
+        return ERROR_INT("srcfile not defined", __func__, 1);
+
+        /* Require output pathname to be in /tmp/ or a subdirectory */
+    if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1)
+        return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1);
+
+        /* Get canonical src pathname */
+    splitPathAtDirectory(srcfile, &dir, &srctail);
+
+#ifndef _WIN32
+    srcpath = pathJoin(dir, srctail);
+    LEPT_FREE(dir);
+
+        /* Generate output pathname */
+    if (!newtail || newtail[0] == '\0')
+        newpath = pathJoin(newtemp, srctail);
+    else
+        newpath = pathJoin(newtemp, newtail);
+    LEPT_FREE(srctail);
+
+        /* Overwrite any existing file at 'newpath' */
+    ret = fileCopy(srcpath, newpath);
+    if (!ret) {  /* and remove srcfile */
+        char *realpath = genPathname(srcpath, NULL);
+        remove(realpath);
+        LEPT_FREE(realpath);
+    }
+#else
+    srcpath = genPathname(dir, srctail);
+    LEPT_FREE(dir);
+
+        /* Generate output pathname */
+    if (!newtail || newtail[0] == '\0')
+        newpath = genPathname(newtemp, srctail);
+    else
+        newpath = genPathname(newtemp, newtail);
+    LEPT_FREE(srctail);
+
+        /* Overwrite any existing file at 'newpath' */
+    ret = MoveFileExA(srcpath, newpath,
+                     MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1;
+#endif  /* ! _WIN32 */
+
+    LEPT_FREE(srcpath);
+    if (pnewpath)
+        *pnewpath = newpath;
+    else
+        LEPT_FREE(newpath);
+    return ret;
+}
+
+
+/*!
+ * \brief   lept_cp()
+ *
+ * \param[in]    srcfile
+ * \param[in]    newdir    [optional]; can be NULL
+ * \param[in]    newtail   [optional]; can be NULL
+ * \param[out]   pnewpath  [optional] of actual path; can be NULL
+ * \return  0 on success, non-zero on failure
+ *
+ * <pre>
+ * Notes:
+ *      (1) This copies %srcfile to /tmp or to a subdirectory of /tmp.
+ *      (2) %srcfile can either be a full path or relative to the
+ *          current directory.
+ *      (3) %newdir can either specify an existing subdirectory of /tmp,
+ *          or can be NULL.  In the latter case, the file will be written
+ *          into /tmp.
+ *      (4) %newtail can either specify a filename tail or, if NULL,
+ *          the filename is taken from src-tail, the tail of %srcfile.
+ *      (5) For debugging, the computed newpath can be returned.  It must
+ *          be freed by the caller.
+ *      (6) Reminders:
+ *          (a) specify files using unix pathnames
+ *          (b) this does an automatic directory translation for operating
+ *              systems that use a different path for /tmp
+ *      (7) Examples:
+ *          * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
+ *          * newdir = NULL,    newtail = abc     ==> /tmp/abc
+ *          * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
+ *          * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc
+ *
+ * </pre>
+ */
+l_int32
+lept_cp(const char  *srcfile,
+        const char  *newdir,
+        const char  *newtail,
+        char       **pnewpath)
+{
+char    *srcpath, *newpath, *dir, *srctail;
+char     newtemp[256];
+l_int32  ret;
+
+    if (!srcfile)
+        return ERROR_INT("srcfile not defined", __func__, 1);
+
+        /* Require output pathname to be in /tmp or a subdirectory */
+    if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1)
+        return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1);
+
+       /* Get canonical src pathname */
+    splitPathAtDirectory(srcfile, &dir, &srctail);
+
+#ifndef _WIN32
+    srcpath = pathJoin(dir, srctail);
+    LEPT_FREE(dir);
+
+        /* Generate output pathname */
+    if (!newtail || newtail[0] == '\0')
+        newpath = pathJoin(newtemp, srctail);
+    else
+        newpath = pathJoin(newtemp, newtail);
+    LEPT_FREE(srctail);
+
+        /* Overwrite any existing file at 'newpath' */
+    ret = fileCopy(srcpath, newpath);
+#else
+    srcpath = genPathname(dir, srctail);
+    LEPT_FREE(dir);
+
+        /* Generate output pathname */
+    if (!newtail || newtail[0] == '\0')
+        newpath = genPathname(newtemp, srctail);
+    else
+        newpath = genPathname(newtemp, newtail);
+    LEPT_FREE(srctail);
+
+        /* Overwrite any existing file at 'newpath' */
+    ret = CopyFileA(srcpath, newpath, FALSE) ? 0 : 1;
+#endif   /* !_WIN32 */
+
+    LEPT_FREE(srcpath);
+    if (pnewpath)
+        *pnewpath = newpath;
+    else
+        LEPT_FREE(newpath);
+    return ret;
+}
+
+
+/*--------------------------------------------------------------------*
+ *          Special debug/test function for calling 'system'          *
+ *--------------------------------------------------------------------*/
+#if defined(__APPLE__)
+  #include "TargetConditionals.h"
+#endif  /* __APPLE__ */
+
+/*!
+ * \brief   callSystemDebug()
+ *
+ * \param[in]    cmd      command to be exec'd
+ * \return  0 on success
+ *
+ * <pre>
+ * Notes:
+ *      (1) The C library 'system' call is only made through this function.
+ *          It only works in debug/test mode, where the global variable
+ *          LeptDebugOK == TRUE.  This variable is set to FALSE in the
+ *          library as distributed, and calling this function will
+ *          generate an error message.
+ * </pre>
+ */
+l_int32
+callSystemDebug(const char *cmd)
+{
+l_int32  ret;
+
+    if (!cmd) {
+        L_ERROR("cmd not defined\n", __func__);
+        return 1;
+    }
+    if (LeptDebugOK == FALSE) {
+        L_INFO("'system' calls are disabled\n", __func__);
+        return 1;
+    }
+
+#if defined(__APPLE__)  /* iOS 11 does not support system() */
+
+  #if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1)  /* Mac OS X */
+    ret = system(cmd);
+  #elif TARGET_OS_IPHONE || defined(OS_IOS)  /* iOS */
+    L_ERROR("iOS 11 does not support system()\n", __func__);
+  #endif  /* TARGET_OS_OSX */
+
+#else /* ! __APPLE__ */
+
+   ret = system(cmd);
+
+#endif /* __APPLE__ */
+
+   return ret;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                     General file name operations                   *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   splitPathAtDirectory()
+ *
+ * \param[in]    pathname  full path; can be a directory
+ * \param[out]   pdir      [optional] root directory name of
+ *                         input path, including trailing '/'
+ * \param[out]   ptail     [optional] path tail, which is either
+ *                         the file name within the root directory or
+ *                         the last sub-directory in the path
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If you only want the tail, input null for the root directory ptr.
+ *      (2) If you only want the root directory name, input null for the
+ *          tail ptr.
+ *      (3) This function makes decisions based only on the lexical
+ *          structure of the input.  Examples:
+ *            /usr/tmp/abc.d  -->  dir: /usr/tmp/       tail: abc.d
+ *            /usr/tmp/       -->  dir: /usr/tmp/       tail: [empty string]
+ *            /usr/tmp        -->  dir: /usr/           tail: tmp
+ *            abc.d           -->  dir: [empty string]  tail: abc.d
+ *      (4  Consider the first example above: /usr/tmp/abc.d.
+ *          Suppose you want the stem of the file, abc, without either
+ *          the directory or the extension.  This can be extracted in two steps:
+ *              splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail);
+ *                   [sets tail: "abc.d"]
+ *              splitPathAtExtension(tail, &basename, NULL);
+ *                   [sets basename: "abc"]
+ *      (5) The input can have either forward (unix) or backward (win)
+ *          slash separators.  The output has unix separators.
+ *          Note that Win32 pathname functions generally accept both
+ *          slash forms, but the Windows command line interpreter
+ *          only accepts backward slashes, because forward slashes are
+ *          used to demarcate switches (vs. dashes in unix).
+ * </pre>
+ */
+l_ok
+splitPathAtDirectory(const char  *pathname,
+                     char       **pdir,
+                     char       **ptail)
+{
+char  *cpathname, *lastslash;
+
+    if (!pdir && !ptail)
+        return ERROR_INT("null input for both strings", __func__, 1);
+    if (pdir) *pdir = NULL;
+    if (ptail) *ptail = NULL;
+    if (!pathname)
+        return ERROR_INT("pathname not defined", __func__, 1);
+
+    cpathname = stringNew(pathname);
+    convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR);
+    lastslash = strrchr(cpathname, '/');
+    if (lastslash) {
+        if (ptail)
+            *ptail = stringNew(lastslash + 1);
+        if (pdir) {
+            *(lastslash + 1) = '\0';
+            *pdir = cpathname;
+        } else {
+            LEPT_FREE(cpathname);
+        }
+    } else {  /* no directory */
+        if (pdir)
+            *pdir = stringNew("");
+        if (ptail)
+            *ptail = cpathname;
+        else
+            LEPT_FREE(cpathname);
+    }
+
+    return 0;
+}
+
+
+/*!
+ * \brief   splitPathAtExtension()
+ *
+ * \param[in]    pathname    full path; can be a directory
+ * \param[out]   pbasename   [optional] pathname not including the
+ *                           last dot and characters after that
+ * \param[out]   pextension  [optional] path extension, which is
+ *                           the last dot and the characters after it.  If
+ *                           there is no extension, it returns the empty string
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) If you only want the extension, input null for the basename ptr.
+ *      (2) If you only want the basename without extension, input null
+ *          for the extension ptr.
+ *      (3) This function makes decisions based only on the lexical
+ *          structure of the input.  Examples:
+ *            /usr/tmp/abc.jpg  -->  basename: /usr/tmp/abc    ext: .jpg
+ *            /usr/tmp/.jpg     -->  basename: /usr/tmp/       ext: .jpg
+ *            /usr/tmp.jpg/     -->  basename: /usr/tmp.jpg/   ext: [empty str]
+ *            ./.jpg            -->  basename: ./              ext: .jpg
+ *      (4) The input can have either forward (unix) or backward (win)
+ *          slash separators.  The output has unix separators.
+ *      (5) Note that basename, as used here, is different from the result
+ *          of the unix program 'basename'.  Here, basename is the entire
+ *          pathname up to a final extension and its preceding dot.
+ * </pre>
+ */
+l_ok
+splitPathAtExtension(const char  *pathname,
+                     char       **pbasename,
+                     char       **pextension)
+{
+char  *tail, *dir, *lastdot;
+char   empty[4] = "";
+
+    if (!pbasename && !pextension)
+        return ERROR_INT("null input for both strings", __func__, 1);
+    if (pbasename) *pbasename = NULL;
+    if (pextension) *pextension = NULL;
+    if (!pathname)
+        return ERROR_INT("pathname not defined", __func__, 1);
+
+        /* Split out the directory first */
+    splitPathAtDirectory(pathname, &dir, &tail);
+
+        /* Then look for a "." in the tail part.
+         * This way we ignore all "." in the directory. */
+    if ((lastdot = strrchr(tail, '.'))) {
+        if (pextension)
+            *pextension = stringNew(lastdot);
+        if (pbasename) {
+            *lastdot = '\0';
+            *pbasename = stringJoin(dir, tail);
+        }
+    } else {
+        if (pextension)
+            *pextension = stringNew(empty);
+        if (pbasename)
+            *pbasename = stringNew(pathname);
+    }
+    LEPT_FREE(dir);
+    LEPT_FREE(tail);
+    return 0;
+}
+
+
+/*!
+ * \brief   pathJoin()
+ *
+ * \param[in]    dir     [optional] can be null
+ * \param[in]    fname   [optional] can be null
+ * \return  specially concatenated path, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Use unix-style pathname separators ('/').
+ *      (2) %fname can be the entire path, or part of the path containing
+ *          at least one directory, or a tail without a directory, or NULL.
+ *      (3) It produces a path that strips multiple slashes to a single
+ *          slash, joins %dir and %fname by a slash, and has no trailing
+ *          slashes (except in the cases where %dir == "/" and
+ *          %fname == NULL, or v.v.).
+ *      (4) If both %dir and %fname are null, produces an empty string.
+ *      (5) Neither %dir nor %fname can begin with '..'.
+ *      (6) The result is not canonicalized or tested for correctness:
+ *          garbage in (e.g., /&%), garbage out.
+ *      (7) Examples:
+ *             //tmp// + //abc/  -->  /tmp/abc
+ *             tmp/ + /abc/      -->  tmp/abc
+ *             tmp/ + abc/       -->  tmp/abc
+ *             /tmp/ + ///       -->  /tmp
+ *             /tmp/ + NULL      -->  /tmp
+ *             // + /abc//       -->  /abc
+ *             // + NULL         -->  /
+ *             NULL + /abc/def/  -->  /abc/def
+ *             NULL + abc//      -->  abc
+ *             NULL + //         -->  /
+ *             NULL + NULL       -->  (empty string)
+ *             "" + ""           -->  (empty string)
+ *             "" + /            -->  /
+ *             ".." + /etc/foo   -->  NULL
+ *             /tmp + ".."       -->  NULL
+ * </pre>
+ */
+char *
+pathJoin(const char  *dir,
+         const char  *fname)
+{
+const char *slash = "/";
+char       *str, *dest;
+l_int32     i, n1, n2, emptydir;
+size_t      size;
+SARRAY     *sa1, *sa2;
+L_BYTEA    *ba;
+
+    if (!dir && !fname)
+        return stringNew("");
+    if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.')
+        return (char *)ERROR_PTR("dir starts with '..'", __func__, NULL);
+    if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.')
+        return (char *)ERROR_PTR("fname starts with '..'", __func__, NULL);
+
+    sa1 = sarrayCreate(0);
+    sa2 = sarrayCreate(0);
+    ba = l_byteaCreate(4);
+
+        /* Process %dir */
+    if (dir && strlen(dir) > 0) {
+        if (dir[0] == '/')
+            l_byteaAppendString(ba, slash);
+        sarraySplitString(sa1, dir, "/");  /* removes all slashes */
+        n1 = sarrayGetCount(sa1);
+        for (i = 0; i < n1; i++) {
+            str = sarrayGetString(sa1, i, L_NOCOPY);
+            l_byteaAppendString(ba, str);
+            l_byteaAppendString(ba, slash);
+        }
+    }
+
+        /* Special case to add leading slash: dir NULL or empty string  */
+    emptydir = dir && strlen(dir) == 0;
+    if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/')
+        l_byteaAppendString(ba, slash);
+
+        /* Process %fname */
+    if (fname && strlen(fname) > 0) {
+        sarraySplitString(sa2, fname, "/");
+        n2 = sarrayGetCount(sa2);
+        for (i = 0; i < n2; i++) {
+            str = sarrayGetString(sa2, i, L_NOCOPY);
+            l_byteaAppendString(ba, str);
+            l_byteaAppendString(ba, slash);
+        }
+    }
+
+        /* Remove trailing slash */
+    dest = (char *)l_byteaCopyData(ba, &size);
+    if (size > 1 && dest[size - 1] == '/')
+        dest[size - 1] = '\0';
+
+    sarrayDestroy(&sa1);
+    sarrayDestroy(&sa2);
+    l_byteaDestroy(&ba);
+    return dest;
+}
+
+
+/*!
+ * \brief   appendSubdirs()
+ *
+ * \param[in]    basedir
+ * \param[in]    subdirs
+ * \return  concatenated full directory path without trailing slash,
+ *              or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) Use unix pathname separators
+ *      (2) Allocates a new string:  [basedir]/[subdirs]
+ * </pre>
+ */
+char *
+appendSubdirs(const char  *basedir,
+              const char  *subdirs)
+{
+char   *newdir;
+size_t  len1, len2, len3, len4;
+
+    if (!basedir || !subdirs)
+        return (char *)ERROR_PTR("basedir and subdirs not both defined",
+                                 __func__, NULL);
+
+    len1 = strlen(basedir);
+    len2 = strlen(subdirs);
+    len3 = len1 + len2 + 8;
+    if ((newdir = (char *)LEPT_CALLOC(len3, 1)) == NULL)
+        return (char *)ERROR_PTR("newdir not made", __func__, NULL);
+    stringCat(newdir, len3, basedir);
+    if (newdir[len1 - 1] != '/')  /* add '/' if necessary */
+        newdir[len1] = '/';
+    if (subdirs[0] == '/')  /* add subdirs, stripping leading '/' */
+        stringCat(newdir, len3, subdirs + 1);
+    else
+        stringCat(newdir, len3, subdirs);
+    len4 = strlen(newdir);
+    if (newdir[len4 - 1] == '/')  /* strip trailing '/' */
+        newdir[len4 - 1] = '\0';
+
+    return newdir;
+}
+
+
+/*--------------------------------------------------------------------*
+ *                     Special file name operations                   *
+ *--------------------------------------------------------------------*/
+/*!
+ * \brief   convertSepCharsInPath()
+ *
+ * \param[in]    path
+ * \param[in]    type    UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) In-place conversion.
+ *      (2) Type is the resulting type:
+ *            * UNIX_PATH_SEPCHAR:  '\\' ==> '/'
+ *            * WIN_PATH_SEPCHAR:   '/' ==> '\\'
+ *      (3) Virtually all path operations in leptonica use unix separators.
+ *      (4) The backslash is a valid character in unix pathnames and should
+ *          not be converted.  Each backslash needs to be escaped with a
+ *          preceding backslash for the shell, but the actual filename
+ *          does not include these escape characters.
+ * </pre>
+ */
+l_ok
+convertSepCharsInPath(char    *path,
+                      l_int32  type)
+{
+l_int32  i;
+size_t   len;
+
+    if (!path)
+        return ERROR_INT("path not defined", __func__, 1);
+    if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR)
+        return ERROR_INT("invalid type", __func__, 1);
+
+    len = strlen(path);
+    if (type == UNIX_PATH_SEPCHAR) {
+#ifdef _WIN32  /* only convert on Windows */
+        for (i = 0; i < len; i++) {
+            if (path[i] == '\\')
+                path[i] = '/';
+        }
+#endif  /* _WIN32 */
+    } else {  /* WIN_PATH_SEPCHAR */
+        for (i = 0; i < len; i++) {
+            if (path[i] == '/')
+                path[i] = '\\';
+        }
+    }
+    return 0;
+}
+
+
+/*!
+ * \brief   genPathname()
+ *
+ * \param[in]    dir     [optional] directory or full path name,
+ *                       with or without the trailing '/'
+ * \param[in]    fname   [optional] file name within a directory
+ * \return  pathname either a directory or full path, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This function generates actual paths in the following ways:
+ *            * from two sub-parts (e.g., a directory and a file name).
+ *            * from a single path full path, placed in %dir, with
+ *              %fname == NULL.
+ *            * from the name of a file in the local directory placed in
+ *              %fname, with %dir == NULL.
+ *            * if in a "/tmp" directory and on iOS, macOS or Windows,
+ *              the OS specific temp directory is used.
+ *      (2) This does an automatic directory translation for operating
+ *          systems that use a different path for /tmp.
+ *          That path is determined
+ *             * on Windows: by GetTempPath()
+ *             * on macOS, iOS: by confstr() (see man page)
+ *      (3) On unix, the TMPDIR variable is ignored.  No rewriting
+ *          of temp directories is permitted.
+ *      (4) There are four cases for the input:
+ *          (a) %dir is a directory and %fname is defined: result is a
+ *              full path
+ *          (b) %dir is a directory and %fname is null: result is a directory
+ *          (c) %dir is a full path and %fname is null: result is a full path
+ *          (d) %dir is null or an empty string: start in the current dir;
+ *              result is a full path
+ *      (5) In all cases, the resulting pathname is not terminated with a slash
+ *      (6) The caller is responsible for freeing the returned pathname.
+ * </pre>
+ */
+char *
+genPathname(const char  *dir,
+            const char  *fname)
+{
+#if defined(REWRITE_TMP)
+l_int32  rewrite_tmp = TRUE;
+#else
+l_int32  rewrite_tmp = FALSE;
+#endif  /* REWRITE_TMP */
+char    *cdir, *pathout;
+l_int32  dirlen, namelen;
+size_t   size;
+
+    if (!dir && !fname)
+        return (char *)ERROR_PTR("no input", __func__, NULL);
+
+        /* Handle the case where we start from the current directory */
+    if (!dir || dir[0] == '\0') {
+        if ((cdir = getcwd(NULL, 0)) == NULL)
+            return (char *)ERROR_PTR("no current dir found", __func__, NULL);
+    } else {
+        if ((cdir = stringNew(dir)) == NULL)
+            return (char *)ERROR_PTR("stringNew failed", __func__, NULL);
+    }
+
+        /* Convert to unix path separators, and remove the trailing
+         * slash in the directory, except when dir == "/"  */
+    convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR);
+    dirlen = strlen(cdir);
+    if (cdir[dirlen - 1] == '/' && dirlen != 1) {
+        cdir[dirlen - 1] = '\0';
+        dirlen--;
+    }
+
+    namelen = (fname) ? strlen(fname) : 0;
+    size = dirlen + namelen + 256;
+    if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) {
+        LEPT_FREE(cdir);
+        return (char *)ERROR_PTR("pathout not made", __func__, NULL);
+    }
+
+        /* First handle %dir (which may be a full pathname).
+         * There is no path rewriting on unix, and on win32, we do not
+         * rewrite unless the specified directory is /tmp or
+         * a subdirectory of /tmp */
+    if (!rewrite_tmp || dirlen < 4 ||
+        (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) ||  /* not in "/tmp" */
+        (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) {  /* not in "/tmp/" */
+        stringCopy(pathout, cdir, dirlen);
+    } else {  /* Rewrite with "/tmp" specified for the directory. */
+#if defined(__APPLE__)
+        size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, pathout, size);
+        if (n == 0 || n > size) {
+            /* Fall back to using /tmp */
+            stringCopy(pathout, cdir, dirlen);
+        } else {
+            /* Add the rest of cdir */
+            if (dirlen > 4)
+                stringCat(pathout, size, cdir + 4);
+        }
+#elif defined(_WIN32)
+        l_int32 tmpdirlen;
+        char tmpdir[MAX_PATH];
+        GetTempPathA(sizeof(tmpdir), tmpdir);  /* get the Windows temp dir */
+        tmpdirlen = strlen(tmpdir);
+        if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') {
+            tmpdir[tmpdirlen - 1] = '\0';  /* trim the trailing '\' */
+        }
+        tmpdirlen = strlen(tmpdir);
+        stringCopy(pathout, tmpdir, tmpdirlen);
+
+            /* Add the rest of cdir */
+        if (dirlen > 4)
+            stringCat(pathout, size, cdir + 4);
+#endif  /* _WIN32 */
+    }
+
+        /* Now handle %fname */
+    if (fname && strlen(fname) > 0) {
+        dirlen = strlen(pathout);
+        pathout[dirlen] = '/';
+        stringCat(pathout, size, fname);
+    }
+
+    LEPT_FREE(cdir);
+    return pathout;
+}
+
+
+/*!
+ * \brief   makeTempDirname()
+ *
+ * \param[in]    result    preallocated on stack or heap and passed in
+ * \param[in]    nbytes    size of %result array, in bytes
+ * \param[in]    subdir    [optional]; can be NULL or an empty string
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This generates the directory path for output temp files,
+ *          written into %result with unix separators.
+ *      (2) Caller allocates %result, large enough to hold the path,
+ *          which is:
+ *            /tmp/%subdir       (unix)
+ *            [Temp]/%subdir     (Windows, macOS, iOS)
+ *          where [Temp] is the OS path
+ *          and %subdir is in general a set of nested subdirectories:
+ *            dir1/dir2/.../dirN
+ *          which in use would not typically exceed 2 levels.
+ *      (3) Usage example:
+ * \code
+ *           char  result[256];
+ *           makeTempDirname(result, sizeof(result), "lept/golden");
+ * \endcode
+ * </pre>
+ */
+l_ok
+makeTempDirname(char        *result,
+                size_t       nbytes,
+                const char  *subdir)
+{
+char    *dir, *path;
+l_int32  ret = 0;
+size_t   pathlen;
+
+    if (!result)
+        return ERROR_INT("result not defined", __func__, 1);
+    if (subdir && ((subdir[0] == '.') || (subdir[0] == '/')))
+        return ERROR_INT("subdir not an actual subdirectory", __func__, 1);
+
+    memset(result, 0, nbytes);
+
+    dir = pathJoin("/tmp", subdir);
+
+#if defined(REWRITE_TMP)
+    path = genPathname(dir, NULL);
+#else
+    path = stringNew(dir);
+#endif  /*  ~ _WIN32 */
+    pathlen = strlen(path);
+    if (pathlen < nbytes - 1) {
+        stringCopy(result, path, nbytes);
+    } else {
+        L_ERROR("result array too small for path\n", __func__);
+        ret = 1;
+    }
+
+    LEPT_FREE(dir);
+    LEPT_FREE(path);
+    return ret;
+}
+
+
+/*!
+ * \brief   modifyTrailingSlash()
+ *
+ * \param[in]    path     preallocated on stack or heap and passed in
+ * \param[in]    nbytes   size of %path array, in bytes
+ * \param[in]    flag     L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH
+ * \return  0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ *      (1) This carries out the requested action if necessary.
+ * </pre>
+ */
+l_ok
+modifyTrailingSlash(char    *path,
+                    size_t   nbytes,
+                    l_int32  flag)
+{
+char    lastchar;
+size_t  len;
+
+    if (!path)
+        return ERROR_INT("path not defined", __func__, 1);
+    if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH)
+        return ERROR_INT("invalid flag", __func__, 1);
+
+    len = strlen(path);
+    lastchar = path[len - 1];
+    if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) {
+        path[len] = '/';
+        path[len + 1] = '\0';
+    } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') {
+        path[len - 1] = '\0';
+    }
+    return 0;
+}
+
+
+/*!
+ * \brief   l_makeTempFilename()
+ *
+ * \return  fname : heap allocated filename; returns NULL on failure.
+ *
+ * <pre>
+ * Notes:
+ *      (1) On unix, this makes a filename of the form
+ *               "/tmp/lept.XXXXXX",
+ *          where each X is a random character.
+ *      (2) On Windows, this makes a filename of the form
+ *               "/[Temp]/lp.XXXXXX".
+ *      (3) On all systems, this fails if the file is not writable.
+ *      (4) Safest usage is to write to a subdirectory in debug code.
+ *      (5) The returned filename must be freed by the caller, using lept_free.
+ *      (6) The tail of the filename has a '.', so that cygwin interprets
+ *          the file as having an extension.  Otherwise, cygwin assumes it
+ *          is an executable and appends ".exe" to the filename.
+ *      (7) On unix, whenever possible use tmpfile() instead.  tmpfile()
+ *          hides the file name, returns a stream opened for write,
+ *          and deletes the temp file when the stream is closed.
+ * </pre>
+ */
+char *
+l_makeTempFilename(void)
+{
+char  dirname[240];
+
+    if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1)
+        return (char *)ERROR_PTR("failed to make dirname", __func__, NULL);
+
+#ifndef _WIN32
+{
+    char    *pattern;
+    l_int32  fd;
+    pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL);
+    fd = mkstemp(pattern);
+    if (fd == -1) {
+        LEPT_FREE(pattern);
+        return (char *)ERROR_PTR("mkstemp failed", __func__, NULL);
+    }
+    close(fd);
+    return pattern;
+}
+#else
+{
+    char  fname[MAX_PATH];
+    FILE *fp;
+    if (GetTempFileNameA(dirname, "lp.", 0, fname) == 0)
+        return (char *)ERROR_PTR("GetTempFileName failed", __func__, NULL);
+    if ((fp = fopen(fname, "wb")) == NULL)
+        return (char *)ERROR_PTR("file cannot be written to", __func__, NULL);
+    fclose(fp);
+    return stringNew(fname);
+}
+#endif  /*  ~ _WIN32 */
+}
+
+
+/*!
+ * \brief   extractNumberFromFilename()
+ *
+ * \param[in]    fname
+ * \param[in]    numpre    number of characters before the digits to be found
+ * \param[in]    numpost   number of characters after the digits to be found
+ * \return  num number embedded in the filename; -1 on error or if
+ *                   not found
+ *
+ * <pre>
+ * Notes:
+ *      (1) The number is to be found in the basename, which is the
+ *          filename without either the directory or the last extension.
+ *      (2) When a number is found, it is non-negative.  If no number
+ *          is found, this returns -1, without an error message.  The
+ *          caller needs to check.
+ * </pre>
+ */
+l_int32
+extractNumberFromFilename(const char  *fname,
+                          l_int32      numpre,
+                          l_int32      numpost)
+{
+char    *tail, *basename;
+l_int32  len, nret, num;
+
+    if (!fname)
+        return ERROR_INT("fname not defined", __func__, -1);
+
+    splitPathAtDirectory(fname, NULL, &tail);
+    splitPathAtExtension(tail, &basename, NULL);
+    LEPT_FREE(tail);
+
+    len = strlen(basename);
+    if (numpre + numpost > len - 1) {
+        LEPT_FREE(basename);
+        return ERROR_INT("numpre + numpost too big", __func__, -1);
+    }
+
+    basename[len - numpost] = '\0';
+    nret = sscanf(basename + numpre, "%d", &num);
+    LEPT_FREE(basename);
+
+    if (nret == 1)
+        return num;
+    else
+        return -1;  /* not found */
+}
author	Franz Glasner <fzglas.hg@dom66.de>
date	Mon, 15 Sep 2025 11:43:07 +0200
parents
children