Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/utils2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file utils2.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * ------------------------------------------ | |
| 32 * This file has these utilities: | |
| 33 * - safe string operations | |
| 34 * - find/replace operations on strings | |
| 35 * - read/write between file and memory | |
| 36 * - multi-platform file and directory operations | |
| 37 * - file name operations | |
| 38 * ------------------------------------------ | |
| 39 * | |
| 40 * Safe string procs | |
| 41 * char *stringNew() | |
| 42 * l_int32 stringCopy() | |
| 43 * l_int32 stringCopySegment() | |
| 44 * l_int32 stringReplace() | |
| 45 * l_int32 stringLength() | |
| 46 * l_int32 stringCat() | |
| 47 * char *stringConcatNew() | |
| 48 * char *stringJoin() | |
| 49 * l_int32 stringJoinIP() | |
| 50 * char *stringReverse() | |
| 51 * char *strtokSafe() | |
| 52 * l_int32 stringSplitOnToken() | |
| 53 * | |
| 54 * Find and replace string and array procs | |
| 55 * l_int32 stringCheckForChars() | |
| 56 * char *stringRemoveChars() | |
| 57 * char *stringReplaceEachSubstr() | |
| 58 * char *stringReplaceSubstr() | |
| 59 * L_DNA *stringFindEachSubstr() | |
| 60 * l_int32 stringFindSubstr() | |
| 61 * l_uint8 *arrayReplaceEachSequence() | |
| 62 * L_DNA *arrayFindEachSequence() | |
| 63 * l_int32 arrayFindSequence() | |
| 64 * | |
| 65 * Safe realloc | |
| 66 * void *reallocNew() | |
| 67 * | |
| 68 * Read and write between file and memory | |
| 69 * l_uint8 *l_binaryRead() | |
| 70 * l_uint8 *l_binaryReadStream() | |
| 71 * l_uint8 *l_binaryReadSelect() | |
| 72 * l_uint8 *l_binaryReadSelectStream() | |
| 73 * l_int32 l_binaryWrite() | |
| 74 * l_int32 nbytesInFile() | |
| 75 * l_int32 fnbytesInFile() | |
| 76 * | |
| 77 * Copy and compare in memory | |
| 78 * l_uint8 *l_binaryCopy() | |
| 79 * l_uint8 *l_binaryCompare() | |
| 80 * | |
| 81 * File copy operations | |
| 82 * l_int32 fileCopy() | |
| 83 * l_int32 fileConcatenate() | |
| 84 * l_int32 fileAppendString() | |
| 85 * | |
| 86 * File split operations | |
| 87 * l_int32 fileSplitLinesUniform() | |
| 88 * | |
| 89 * Multi-platform functions for opening file streams | |
| 90 * FILE *fopenReadStream() | |
| 91 * FILE *fopenWriteStream() | |
| 92 * FILE *fopenReadFromMemory() | |
| 93 * | |
| 94 * Opening a Windows tmpfile for writing | |
| 95 * FILE *fopenWriteWinTempfile() | |
| 96 * | |
| 97 * Multi-platform functions that avoid C-runtime boundary crossing | |
| 98 * with Windows DLLs (use in programs only) | |
| 99 * FILE *lept_fopen() | |
| 100 * l_int32 lept_fclose() | |
| 101 * void *lept_calloc() | |
| 102 * void lept_free() | |
| 103 * | |
| 104 * Multi-platform file system operations in temp directories | |
| 105 * l_int32 lept_mkdir() | |
| 106 * l_int32 lept_rmdir() | |
| 107 * l_int32 lept_direxists() | |
| 108 * l_int32 lept_mv() | |
| 109 * l_int32 lept_rm_match() | |
| 110 * l_int32 lept_rm() | |
| 111 * l_int32 lept_rmfile() | |
| 112 * l_int32 lept_cp() | |
| 113 * | |
| 114 * Special debug/test function for calling 'system' | |
| 115 * l_int32 callSystemDebug() | |
| 116 * | |
| 117 * General file name operations | |
| 118 * l_int32 splitPathAtDirectory() | |
| 119 * l_int32 splitPathAtExtension() | |
| 120 * char *pathJoin() | |
| 121 * char *appendSubdirs() | |
| 122 * | |
| 123 * Special file name operations | |
| 124 * l_int32 convertSepCharsInPath() | |
| 125 * char *genPathname() | |
| 126 * l_int32 makeTempDirname() | |
| 127 * l_int32 modifyTrailingSlash() | |
| 128 * char *l_makeTempFilename() | |
| 129 * l_int32 extractNumberFromFilename() | |
| 130 * | |
| 131 * | |
| 132 * Notes on multi-platform development | |
| 133 * ----------------------------------- | |
| 134 * This is important: | |
| 135 * (1) With the exception of splitPathAtDirectory(), splitPathAtExtension() | |
| 136 * and genPathname(), all input pathnames must have unix separators. | |
| 137 * (2) On macOS, iOS and Windows, for read or write to "/tmp/..." | |
| 138 * the filename is rewritten to use the OS specific temp directory: | |
| 139 * /tmp ==> [Temp]/... | |
| 140 * (3) This filename rewrite, along with the conversion from unix | |
| 141 * to OS specific pathnames, happens in genPathname(). | |
| 142 * (4) Use fopenReadStream() and fopenWriteStream() to open files, | |
| 143 * because these use genPathname() to find the platform-dependent | |
| 144 * filenames. Likewise for l_binaryRead() and l_binaryWrite(). | |
| 145 * (5) For moving, copying and removing files and directories that are in | |
| 146 * subdirectories of /tmp, use the lept_*() file system shell wrappers: | |
| 147 * lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp(). | |
| 148 * (6) For programs use the lept_fopen(), lept_fclose(), lept_calloc() | |
| 149 * and lept_free() C library wrappers. These work properly on Windows, | |
| 150 * where the same DLL must perform complementary operations on | |
| 151 * file streams (open/close) and heap memory (malloc/free). | |
| 152 * (7) Why read and write files to temp directories? | |
| 153 * The library needs the ability to read and write ephemeral | |
| 154 * files to default places, both for generating debugging output | |
| 155 * and for supporting regression tests. Applications also need | |
| 156 * this ability for debugging. | |
| 157 * (8) Why do the pathname rewrite on macOS, iOS and Windows? | |
| 158 * The goal is to have the library, and programs using the library, | |
| 159 * run on multiple platforms without changes. The location of | |
| 160 * temporary files depends on the platform as well as the user's | |
| 161 * configuration. Temp files on some operating systems are in some | |
| 162 * directory not known a priori. To make everything work seamlessly on | |
| 163 * any OS, every time you open a file for reading or writing, | |
| 164 * use a special function such as fopenReadStream() or | |
| 165 * fopenWriteStream(); these call genPathname() to ensure that | |
| 166 * if it is a temp file, the correct path is used. To indicate | |
| 167 * that this is a temp file, the application is written with the | |
| 168 * root directory of the path in a canonical form: "/tmp". | |
| 169 * (9) Why is it that multi-platform directory functions like lept_mkdir() | |
| 170 * and lept_rmdir(), as well as associated file functions like | |
| 171 * lept_rm(), lept_mv() and lept_cp(), only work in the temp dir? | |
| 172 * These functions were designed to provide easy manipulation of | |
| 173 * temp files. The restriction to temp files is for safety -- to | |
| 174 * prevent an accidental deletion of important files. For example, | |
| 175 * lept_rmdir() first deletes all files in a specified subdirectory | |
| 176 * of temp, and then removes the directory. | |
| 177 * | |
| 178 * </pre> | |
| 179 */ | |
| 180 | |
| 181 #ifdef HAVE_CONFIG_H | |
| 182 #include <config_auto.h> | |
| 183 #endif /* HAVE_CONFIG_H */ | |
| 184 | |
| 185 #ifdef _MSC_VER | |
| 186 #include <process.h> | |
| 187 #include <direct.h> | |
| 188 #define getcwd _getcwd /* fix MSVC warning */ | |
| 189 #else | |
| 190 #include <unistd.h> | |
| 191 #endif /* _MSC_VER */ | |
| 192 | |
| 193 #ifdef _WIN32 | |
| 194 #include <windows.h> | |
| 195 #include <fcntl.h> /* _O_CREAT, ... */ | |
| 196 #include <io.h> /* _open */ | |
| 197 #include <sys/stat.h> /* _S_IREAD, _S_IWRITE */ | |
| 198 #else | |
| 199 #include <sys/stat.h> /* for stat, mkdir(2) */ | |
| 200 #include <sys/types.h> | |
| 201 #endif | |
| 202 | |
| 203 #ifdef __APPLE__ | |
| 204 #include <unistd.h> | |
| 205 #include <errno.h> | |
| 206 #endif | |
| 207 | |
| 208 #include <string.h> | |
| 209 #include <stddef.h> | |
| 210 #include "allheaders.h" | |
| 211 | |
| 212 #if defined(__APPLE__) || defined(_WIN32) | |
| 213 /* Rewrite paths starting with /tmp for macOS, iOS and Windows. */ | |
| 214 #define REWRITE_TMP | |
| 215 #endif | |
| 216 | |
| 217 /*--------------------------------------------------------------------* | |
| 218 * Safe string operations * | |
| 219 *--------------------------------------------------------------------*/ | |
| 220 /*! | |
| 221 * \brief stringNew() | |
| 222 * | |
| 223 * \param[in] src | |
| 224 * \return dest copy of %src string, or NULL on error | |
| 225 */ | |
| 226 char * | |
| 227 stringNew(const char *src) | |
| 228 { | |
| 229 l_int32 len; | |
| 230 char *dest; | |
| 231 | |
| 232 if (!src) { | |
| 233 L_WARNING("src not defined\n", __func__); | |
| 234 return NULL; | |
| 235 } | |
| 236 | |
| 237 len = strlen(src); | |
| 238 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) | |
| 239 return (char *)ERROR_PTR("dest not made", __func__, NULL); | |
| 240 | |
| 241 stringCopy(dest, src, len); | |
| 242 return dest; | |
| 243 } | |
| 244 | |
| 245 | |
| 246 /*! | |
| 247 * \brief stringCopy() | |
| 248 * | |
| 249 * \param[in] dest existing byte buffer | |
| 250 * \param[in] src string [optional] can be null | |
| 251 * \param[in] n max number of characters to copy | |
| 252 * \return 0 if OK, 1 on error | |
| 253 * | |
| 254 * <pre> | |
| 255 * Notes: | |
| 256 * (1) Relatively safe wrapper for strncpy, that checks the input, | |
| 257 * and does not complain if %src is null or %n < 1. | |
| 258 * If %n < 1, this is a no-op. | |
| 259 * (2) %dest needs to be at least %n bytes in size. | |
| 260 * (3) We don't call strncpy() because valgrind complains about | |
| 261 * use of uninitialized values. | |
| 262 * </pre> | |
| 263 */ | |
| 264 l_ok | |
| 265 stringCopy(char *dest, | |
| 266 const char *src, | |
| 267 l_int32 n) | |
| 268 { | |
| 269 l_int32 i; | |
| 270 | |
| 271 if (!dest) | |
| 272 return ERROR_INT("dest not defined", __func__, 1); | |
| 273 if (!src || n < 1) | |
| 274 return 0; | |
| 275 | |
| 276 /* Implementation of strncpy that valgrind doesn't complain about */ | |
| 277 for (i = 0; i < n && src[i] != '\0'; i++) | |
| 278 dest[i] = src[i]; | |
| 279 for (; i < n; i++) | |
| 280 dest[i] = '\0'; | |
| 281 return 0; | |
| 282 } | |
| 283 | |
| 284 | |
| 285 /*! | |
| 286 * \brief stringCopySegment() | |
| 287 * | |
| 288 * | |
| 289 * \param[in] src string | |
| 290 * \param[in] start byte position at start of segment | |
| 291 * \param[in] nbytes number of bytes in the segment; use 0 to go to end | |
| 292 * \return copy of segment, or NULL on error | |
| 293 * | |
| 294 * <pre> | |
| 295 * Notes: | |
| 296 * (1) This is a variant of stringNew() that makes a new string | |
| 297 * from a segment of the input string. The segment is specified | |
| 298 * by the starting position and the number of bytes. | |
| 299 * (2) The start location %start must be within the string %src. | |
| 300 * (3) The copy is truncated to the end of the source string. | |
| 301 * Use %nbytes = 0 to copy to the end of %src. | |
| 302 * </pre> | |
| 303 */ | |
| 304 char * | |
| 305 stringCopySegment(const char *src, | |
| 306 l_int32 start, | |
| 307 l_int32 nbytes) | |
| 308 { | |
| 309 char *dest; | |
| 310 l_int32 len; | |
| 311 | |
| 312 if (!src) | |
| 313 return (char *)ERROR_PTR("src not defined", __func__, NULL); | |
| 314 len = strlen(src); | |
| 315 if (start < 0 || start > len - 1) | |
| 316 return (char *)ERROR_PTR("invalid start", __func__, NULL); | |
| 317 if (nbytes <= 0) /* copy to the end */ | |
| 318 nbytes = len - start; | |
| 319 if (start + nbytes > len) /* truncate to the end */ | |
| 320 nbytes = len - start; | |
| 321 if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL) | |
| 322 return (char *)ERROR_PTR("dest not made", __func__, NULL); | |
| 323 stringCopy(dest, src + start, nbytes); | |
| 324 return dest; | |
| 325 } | |
| 326 | |
| 327 | |
| 328 /*! | |
| 329 * \brief stringReplace() | |
| 330 * | |
| 331 * \param[out] pdest string copy | |
| 332 * \param[in] src [optional] string; can be null | |
| 333 * \return 0 if OK; 1 on error | |
| 334 * | |
| 335 * <pre> | |
| 336 * Notes: | |
| 337 * (1) Frees any existing dest string | |
| 338 * (2) Puts a copy of src string in the dest | |
| 339 * (3) If either or both strings are null, does something reasonable. | |
| 340 * </pre> | |
| 341 */ | |
| 342 l_ok | |
| 343 stringReplace(char **pdest, | |
| 344 const char *src) | |
| 345 { | |
| 346 if (!pdest) | |
| 347 return ERROR_INT("pdest not defined", __func__, 1); | |
| 348 | |
| 349 if (*pdest) | |
| 350 LEPT_FREE(*pdest); | |
| 351 | |
| 352 if (src) | |
| 353 *pdest = stringNew(src); | |
| 354 else | |
| 355 *pdest = NULL; | |
| 356 return 0; | |
| 357 } | |
| 358 | |
| 359 | |
| 360 /*! | |
| 361 * \brief stringLength() | |
| 362 * | |
| 363 * \param[in] src string can be null or NULL-terminated string | |
| 364 * \param[in] size number of bytes to check; e.g., size of src buffer | |
| 365 * \return length of src in bytes; 0 if no bytes are found; | |
| 366 * %size on error when NUL byte is not found. | |
| 367 * | |
| 368 * <pre> | |
| 369 * Notes: | |
| 370 * (1) Safe implementation of strlen that only checks %size bytes | |
| 371 * for trailing NUL. | |
| 372 * (2) Valid returned string lengths are between 0 and size - 1. | |
| 373 * If %size bytes are checked without finding a NUL byte, then | |
| 374 * an error is indicated by returning %size. | |
| 375 * </pre> | |
| 376 */ | |
| 377 l_int32 | |
| 378 stringLength(const char *src, | |
| 379 size_t size) | |
| 380 { | |
| 381 l_int32 i; | |
| 382 | |
| 383 if (!src) | |
| 384 return 0; | |
| 385 if (size < 1) | |
| 386 return ERROR_INT("size < 1; too small", __func__, 0); | |
| 387 | |
| 388 for (i = 0; i < size; i++) { | |
| 389 if (src[i] == '\0') | |
| 390 return i; | |
| 391 } | |
| 392 | |
| 393 /* Didn't find a NUL byte */ | |
| 394 L_ERROR("NUL byte not found in %zu bytes\n", __func__, size); | |
| 395 return size; | |
| 396 } | |
| 397 | |
| 398 | |
| 399 /*! | |
| 400 * \brief stringCat() | |
| 401 * | |
| 402 * \param[in] dest null-terminated byte buffer | |
| 403 * \param[in] size size of dest buffer | |
| 404 * \param[in] src string can be null or NULL-terminated string | |
| 405 * \return number of bytes added to dest; -1 on error | |
| 406 * | |
| 407 * <pre> | |
| 408 * Notes: | |
| 409 * (1) Alternative implementation of strncat, that checks the input, | |
| 410 * is easier to use (since the size of the dest buffer is specified | |
| 411 * rather than the number of bytes to copy), and does not complain | |
| 412 * if %src is null. | |
| 413 * (2) Never writes past end of dest. | |
| 414 * (3) If there is not enough room to append the src, which is an error, | |
| 415 * it does nothing. | |
| 416 * (4) N.B. The order of 2nd and 3rd args is reversed from that in | |
| 417 * strncat, as in the Windows function strcat_s(). | |
| 418 * </pre> | |
| 419 */ | |
| 420 l_int32 | |
| 421 stringCat(char *dest, | |
| 422 size_t size, | |
| 423 const char *src) | |
| 424 { | |
| 425 l_int32 i, n; | |
| 426 l_int32 lendest, lensrc; | |
| 427 | |
| 428 if (!dest) | |
| 429 return ERROR_INT("dest not defined", __func__, -1); | |
| 430 if (size < 1) | |
| 431 return ERROR_INT("size < 1; too small", __func__, -1); | |
| 432 if (!src) | |
| 433 return 0; | |
| 434 | |
| 435 lendest = stringLength(dest, size); | |
| 436 if (lendest == size) | |
| 437 return ERROR_INT("no terminating nul byte", __func__, -1); | |
| 438 lensrc = stringLength(src, size); | |
| 439 if (lensrc == 0) | |
| 440 return 0; /* nothing added to dest */ | |
| 441 n = (lendest + lensrc > size - 1) ? 0 : lensrc; | |
| 442 if (n == 0) | |
| 443 return ERROR_INT("dest too small for append", __func__, -1); | |
| 444 | |
| 445 for (i = 0; i < n; i++) | |
| 446 dest[lendest + i] = src[i]; | |
| 447 dest[lendest + n] = '\0'; | |
| 448 return n; | |
| 449 } | |
| 450 | |
| 451 | |
| 452 /*! | |
| 453 * \brief stringConcatNew() | |
| 454 * | |
| 455 * \param[in] first first string in list | |
| 456 * \param[in] ... NULL-terminated list of strings | |
| 457 * \return result new string concatenating the input strings, or | |
| 458 * NULL if first == NULL | |
| 459 * | |
| 460 * <pre> | |
| 461 * Notes: | |
| 462 * (1) The last arg in the list of strings must be NULL. | |
| 463 * (2) Caller must free the returned string. | |
| 464 * </pre> | |
| 465 */ | |
| 466 char * | |
| 467 stringConcatNew(const char *first, ...) | |
| 468 { | |
| 469 size_t len; | |
| 470 char *result, *ptr; | |
| 471 const char *arg; | |
| 472 va_list args; | |
| 473 | |
| 474 if (!first) return NULL; | |
| 475 | |
| 476 /* Find the length of the output string */ | |
| 477 va_start(args, first); | |
| 478 len = strlen(first); | |
| 479 while ((arg = va_arg(args, const char *)) != NULL) | |
| 480 len += strlen(arg); | |
| 481 va_end(args); | |
| 482 result = (char *)LEPT_CALLOC(len + 1, sizeof(char)); | |
| 483 | |
| 484 /* Concatenate the args */ | |
| 485 va_start(args, first); | |
| 486 ptr = result; | |
| 487 arg = first; | |
| 488 while (*arg) | |
| 489 *ptr++ = *arg++; | |
| 490 while ((arg = va_arg(args, const char *)) != NULL) { | |
| 491 while (*arg) | |
| 492 *ptr++ = *arg++; | |
| 493 } | |
| 494 va_end(args); | |
| 495 return result; | |
| 496 } | |
| 497 | |
| 498 | |
| 499 /*! | |
| 500 * \brief stringJoin() | |
| 501 * | |
| 502 * \param[in] src1 [optional] string; can be null | |
| 503 * \param[in] src2 [optional] string; can be null | |
| 504 * \return concatenated string, or NULL on error | |
| 505 * | |
| 506 * <pre> | |
| 507 * Notes: | |
| 508 * (1) This is a safe version of strcat; it makes a new string. | |
| 509 * (2) It is not an error if either or both of the strings | |
| 510 * are empty, or if either or both of the pointers are null. | |
| 511 * </pre> | |
| 512 */ | |
| 513 char * | |
| 514 stringJoin(const char *src1, | |
| 515 const char *src2) | |
| 516 { | |
| 517 char *dest; | |
| 518 l_int32 srclen1, srclen2, destlen; | |
| 519 | |
| 520 srclen1 = (src1) ? strlen(src1) : 0; | |
| 521 srclen2 = (src2) ? strlen(src2) : 0; | |
| 522 destlen = srclen1 + srclen2 + 3; | |
| 523 | |
| 524 if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL) | |
| 525 return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL); | |
| 526 | |
| 527 if (src1) | |
| 528 stringCat(dest, destlen, src1); | |
| 529 if (src2) | |
| 530 stringCat(dest, destlen, src2); | |
| 531 return dest; | |
| 532 } | |
| 533 | |
| 534 | |
| 535 /*! | |
| 536 * \brief stringJoinIP() | |
| 537 * | |
| 538 * \param[in,out] psrc1 address of string src1; cannot be on the stack | |
| 539 * \param[in] src2 [optional] string; can be null | |
| 540 * \return 0 if OK, 1 on error | |
| 541 * | |
| 542 * <pre> | |
| 543 * Notes: | |
| 544 * (1) This is a safe in-place version of strcat. The contents of | |
| 545 * src1 is replaced by the concatenation of src1 and src2. | |
| 546 * (2) It is not an error if either or both of the strings | |
| 547 * are empty (""), or if the pointers to the strings (*psrc1, src2) | |
| 548 * are null. | |
| 549 * (3) src1 should be initialized to null or an empty string | |
| 550 * before the first call. Use one of these: | |
| 551 * char *src1 = NULL; | |
| 552 * char *src1 = stringNew(""); | |
| 553 * Then call with: | |
| 554 * stringJoinIP(&src1, src2); | |
| 555 * (4) This can also be implemented as a macro: | |
| 556 * \code | |
| 557 * #define stringJoinIP(src1, src2) \ | |
| 558 * {tmpstr = stringJoin((src1),(src2)); \ | |
| 559 * LEPT_FREE(src1); \ | |
| 560 * (src1) = tmpstr;} | |
| 561 * \endcode | |
| 562 * (5) Another function to consider for joining many strings is | |
| 563 * stringConcatNew(). | |
| 564 * </pre> | |
| 565 */ | |
| 566 l_ok | |
| 567 stringJoinIP(char **psrc1, | |
| 568 const char *src2) | |
| 569 { | |
| 570 char *tmpstr; | |
| 571 | |
| 572 if (!psrc1) | |
| 573 return ERROR_INT("&src1 not defined", __func__, 1); | |
| 574 | |
| 575 tmpstr = stringJoin(*psrc1, src2); | |
| 576 LEPT_FREE(*psrc1); | |
| 577 *psrc1 = tmpstr; | |
| 578 return 0; | |
| 579 } | |
| 580 | |
| 581 | |
| 582 /*! | |
| 583 * \brief stringReverse() | |
| 584 * | |
| 585 * \param[in] src string | |
| 586 * \return dest newly-allocated reversed string | |
| 587 */ | |
| 588 char * | |
| 589 stringReverse(const char *src) | |
| 590 { | |
| 591 char *dest; | |
| 592 l_int32 i, len; | |
| 593 | |
| 594 if (!src) | |
| 595 return (char *)ERROR_PTR("src not defined", __func__, NULL); | |
| 596 len = strlen(src); | |
| 597 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) | |
| 598 return (char *)ERROR_PTR("calloc fail for dest", __func__, NULL); | |
| 599 for (i = 0; i < len; i++) | |
| 600 dest[i] = src[len - 1 - i]; | |
| 601 | |
| 602 return dest; | |
| 603 } | |
| 604 | |
| 605 | |
| 606 /*! | |
| 607 * \brief strtokSafe() | |
| 608 * | |
| 609 * \param[in] cstr input string to be sequentially parsed; | |
| 610 * use NULL after the first call | |
| 611 * \param[in] seps a string of character separators | |
| 612 * \param[out] psaveptr ptr to the next char after | |
| 613 * the last encountered separator | |
| 614 * \return substr a new string that is copied from the previous | |
| 615 * saveptr up to but not including the next | |
| 616 * separator character, or NULL if end of cstr. | |
| 617 * | |
| 618 * <pre> | |
| 619 * Notes: | |
| 620 * (1) This is a thread-safe implementation of strtok. | |
| 621 * (2) It has the same interface as strtok_r. | |
| 622 * (3) It differs from strtok_r in usage in two respects: | |
| 623 * (a) the input string is not altered | |
| 624 * (b) each returned substring is newly allocated and must | |
| 625 * be freed after use. | |
| 626 * (4) Let me repeat that. This is "safe" because the input | |
| 627 * string is not altered and because each returned string | |
| 628 * is newly allocated on the heap. | |
| 629 * (5) It is here because, surprisingly, some C libraries don't | |
| 630 * include strtok_r. | |
| 631 * (6) Important usage points: | |
| 632 * ~ Input the string to be parsed on the first invocation. | |
| 633 * ~ Then input NULL after that; the value returned in saveptr | |
| 634 * is used in all subsequent calls. | |
| 635 * (7) This is only slightly slower than strtok_r. | |
| 636 * </pre> | |
| 637 */ | |
| 638 char * | |
| 639 strtokSafe(char *cstr, | |
| 640 const char *seps, | |
| 641 char **psaveptr) | |
| 642 { | |
| 643 char nextc; | |
| 644 char *start, *substr; | |
| 645 l_int32 istart, i, j, nchars; | |
| 646 | |
| 647 if (!seps) | |
| 648 return (char *)ERROR_PTR("seps not defined", __func__, NULL); | |
| 649 if (!psaveptr) | |
| 650 return (char *)ERROR_PTR("&saveptr not defined", __func__, NULL); | |
| 651 | |
| 652 if (!cstr) { | |
| 653 start = *psaveptr; | |
| 654 } else { | |
| 655 start = cstr; | |
| 656 *psaveptr = NULL; | |
| 657 } | |
| 658 if (!start) /* nothing to do */ | |
| 659 return NULL; | |
| 660 | |
| 661 /* First time, scan for the first non-sep character */ | |
| 662 istart = 0; | |
| 663 if (cstr) { | |
| 664 for (istart = 0;; istart++) { | |
| 665 if ((nextc = start[istart]) == '\0') { | |
| 666 *psaveptr = NULL; /* in case caller doesn't check ret value */ | |
| 667 return NULL; | |
| 668 } | |
| 669 if (!strchr(seps, nextc)) | |
| 670 break; | |
| 671 } | |
| 672 } | |
| 673 | |
| 674 /* Scan through, looking for a sep character; if none is | |
| 675 * found, 'i' will be at the end of the string. */ | |
| 676 for (i = istart;; i++) { | |
| 677 if ((nextc = start[i]) == '\0') | |
| 678 break; | |
| 679 if (strchr(seps, nextc)) | |
| 680 break; | |
| 681 } | |
| 682 | |
| 683 /* Save the substring */ | |
| 684 nchars = i - istart; | |
| 685 substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char)); | |
| 686 stringCopy(substr, start + istart, nchars); | |
| 687 | |
| 688 /* Look for the next non-sep character. | |
| 689 * If this is the last substring, return a null saveptr. */ | |
| 690 for (j = i;; j++) { | |
| 691 if ((nextc = start[j]) == '\0') { | |
| 692 *psaveptr = NULL; /* no more non-sep characters */ | |
| 693 break; | |
| 694 } | |
| 695 if (!strchr(seps, nextc)) { | |
| 696 *psaveptr = start + j; /* start here on next call */ | |
| 697 break; | |
| 698 } | |
| 699 } | |
| 700 | |
| 701 return substr; | |
| 702 } | |
| 703 | |
| 704 | |
| 705 /*! | |
| 706 * \brief stringSplitOnToken() | |
| 707 * | |
| 708 * \param[in] cstr input string to be split; not altered | |
| 709 * \param[in] seps a string of character separators | |
| 710 * \param[out] phead ptr to copy of the input string, up to | |
| 711 * the first separator token encountered | |
| 712 * \param[out] ptail ptr to copy of the part of the input string | |
| 713 * starting with the first non-separator character | |
| 714 * that occurs after the first separator is found | |
| 715 * \return 0 if OK, 1 on error | |
| 716 * | |
| 717 * <pre> | |
| 718 * Notes: | |
| 719 * (1) The input string is not altered; all split parts are new strings. | |
| 720 * (2) The split occurs around the first consecutive sequence of | |
| 721 * tokens encountered. | |
| 722 * (3) The head goes from the beginning of the string up to | |
| 723 * but not including the first token found. | |
| 724 * (4) The tail contains the second part of the string, starting | |
| 725 * with the first char in that part that is NOT a token. | |
| 726 * (5) If no separator token is found, 'head' contains a copy | |
| 727 * of the input string and 'tail' is null. | |
| 728 * </pre> | |
| 729 */ | |
| 730 l_ok | |
| 731 stringSplitOnToken(char *cstr, | |
| 732 const char *seps, | |
| 733 char **phead, | |
| 734 char **ptail) | |
| 735 { | |
| 736 char *saveptr; | |
| 737 | |
| 738 if (!phead) | |
| 739 return ERROR_INT("&head not defined", __func__, 1); | |
| 740 if (!ptail) | |
| 741 return ERROR_INT("&tail not defined", __func__, 1); | |
| 742 *phead = *ptail = NULL; | |
| 743 if (!cstr) | |
| 744 return ERROR_INT("cstr not defined", __func__, 1); | |
| 745 if (!seps) | |
| 746 return ERROR_INT("seps not defined", __func__, 1); | |
| 747 | |
| 748 *phead = strtokSafe(cstr, seps, &saveptr); | |
| 749 if (saveptr) | |
| 750 *ptail = stringNew(saveptr); | |
| 751 return 0; | |
| 752 } | |
| 753 | |
| 754 | |
| 755 /*--------------------------------------------------------------------* | |
| 756 * Find and replace procs * | |
| 757 *--------------------------------------------------------------------*/ | |
| 758 /*! | |
| 759 * \brief stringCheckForChars() | |
| 760 * | |
| 761 * \param[in] src input string; can be of zero length | |
| 762 * \param[in] chars string of chars to be searched for in %src | |
| 763 * \param[out] pfound 1 if any characters are found; 0 otherwise | |
| 764 * \return 0 if OK, 1 on error | |
| 765 * | |
| 766 * <pre> | |
| 767 * Notes: | |
| 768 * (1) This can be used to sanitize an operation by checking for | |
| 769 * special characters that don't belong in a string. | |
| 770 * </pre> | |
| 771 */ | |
| 772 l_ok | |
| 773 stringCheckForChars(const char *src, | |
| 774 const char *chars, | |
| 775 l_int32 *pfound) | |
| 776 { | |
| 777 char ch; | |
| 778 l_int32 i, n; | |
| 779 | |
| 780 if (!pfound) | |
| 781 return ERROR_INT("&found not defined", __func__, 1); | |
| 782 *pfound = FALSE; | |
| 783 if (!src || !chars) | |
| 784 return ERROR_INT("src and chars not both defined", __func__, 1); | |
| 785 | |
| 786 n = strlen(src); | |
| 787 for (i = 0; i < n; i++) { | |
| 788 ch = src[i]; | |
| 789 if (strchr(chars, ch)) { | |
| 790 *pfound = TRUE; | |
| 791 break; | |
| 792 } | |
| 793 } | |
| 794 return 0; | |
| 795 } | |
| 796 | |
| 797 | |
| 798 /*! | |
| 799 * \brief stringRemoveChars() | |
| 800 * | |
| 801 * \param[in] src input string; can be of zero length | |
| 802 * \param[in] remchars string of chars to be removed from src | |
| 803 * \return dest string with specified chars removed, or NULL on error | |
| 804 */ | |
| 805 char * | |
| 806 stringRemoveChars(const char *src, | |
| 807 const char *remchars) | |
| 808 { | |
| 809 char ch; | |
| 810 char *dest; | |
| 811 l_int32 nsrc, i, k; | |
| 812 | |
| 813 if (!src) | |
| 814 return (char *)ERROR_PTR("src not defined", __func__, NULL); | |
| 815 if (!remchars) | |
| 816 return stringNew(src); | |
| 817 | |
| 818 if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL) | |
| 819 return (char *)ERROR_PTR("dest not made", __func__, NULL); | |
| 820 nsrc = strlen(src); | |
| 821 for (i = 0, k = 0; i < nsrc; i++) { | |
| 822 ch = src[i]; | |
| 823 if (!strchr(remchars, ch)) | |
| 824 dest[k++] = ch; | |
| 825 } | |
| 826 | |
| 827 return dest; | |
| 828 } | |
| 829 | |
| 830 | |
| 831 /*! | |
| 832 * \brief stringReplaceEachSubstr() | |
| 833 * | |
| 834 * \param[in] src input string; can be of zero length | |
| 835 * \param[in] sub1 substring to be replaced | |
| 836 * \param[in] sub2 substring to put in; can be "" | |
| 837 * \param[out] pcount [optional] the number of times that sub1 | |
| 838 * is found in src; 0 if not found | |
| 839 * \return dest string with substring replaced, or NULL if the | |
| 840 * substring not found or on error. | |
| 841 * | |
| 842 * <pre> | |
| 843 * Notes: | |
| 844 * (1) This is a wrapper for simple string substitution that uses | |
| 845 * the more general function arrayReplaceEachSequence(). | |
| 846 * (2) This finds every non-overlapping occurrence of %sub1 in | |
| 847 * %src, and replaces it with %sub2. By "non-overlapping" | |
| 848 * we mean that after it finds each match, it removes the | |
| 849 * matching characters, replaces with the substitution string | |
| 850 * (if not empty), and continues. For example, if you replace | |
| 851 * 'aa' by 'X' in 'baaabbb', you find one match at position 1 | |
| 852 * and return 'bXabbb'. | |
| 853 * (3) To only remove each instance of sub1, use "" for sub2 | |
| 854 * (4) Returns a copy of %src if sub1 and sub2 are the same. | |
| 855 * (5) If the input %src is binary data that can have null characters, | |
| 856 * use arrayReplaceEachSequence() directly. | |
| 857 * </pre> | |
| 858 */ | |
| 859 char * | |
| 860 stringReplaceEachSubstr(const char *src, | |
| 861 const char *sub1, | |
| 862 const char *sub2, | |
| 863 l_int32 *pcount) | |
| 864 { | |
| 865 size_t datalen; | |
| 866 | |
| 867 if (pcount) *pcount = 0; | |
| 868 if (!src || !sub1 || !sub2) | |
| 869 return (char *)ERROR_PTR("src, sub1, sub2 not all defined", | |
| 870 __func__, NULL); | |
| 871 | |
| 872 if (strlen(sub2) > 0) { | |
| 873 return (char *)arrayReplaceEachSequence( | |
| 874 (const l_uint8 *)src, strlen(src), | |
| 875 (const l_uint8 *)sub1, strlen(sub1), | |
| 876 (const l_uint8 *)sub2, strlen(sub2), | |
| 877 &datalen, pcount); | |
| 878 } else { /* empty replacement string; removal only */ | |
| 879 return (char *)arrayReplaceEachSequence( | |
| 880 (const l_uint8 *)src, strlen(src), | |
| 881 (const l_uint8 *)sub1, strlen(sub1), | |
| 882 NULL, 0, &datalen, pcount); | |
| 883 } | |
| 884 } | |
| 885 | |
| 886 | |
| 887 /*! | |
| 888 * \brief stringReplaceSubstr() | |
| 889 * | |
| 890 * \param[in] src input string; can be of zero length | |
| 891 * \param[in] sub1 substring to be replaced | |
| 892 * \param[in] sub2 substring to put in; can be "" | |
| 893 * \param[in,out] ploc [optional] input start location for search; | |
| 894 * returns the loc after replacement | |
| 895 * \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise | |
| 896 * \return dest string with substring replaced, or NULL on error. | |
| 897 * | |
| 898 * <pre> | |
| 899 * Notes: | |
| 900 * (1) Replaces the first instance. | |
| 901 * (2) To remove sub1 without replacement, use "" for sub2. | |
| 902 * (3) Returns a copy of %src if either no instance of %sub1 is found, | |
| 903 * or if %sub1 and %sub2 are the same. | |
| 904 * (4) If %ploc == NULL, the search will start at the beginning of %src. | |
| 905 * If %ploc != NULL, *ploc must be initialized to the byte offset | |
| 906 * within %src from which the search starts. To search the | |
| 907 * string from the beginning, set %loc = 0 and input &loc. | |
| 908 * After finding %sub1 and replacing it with %sub2, %loc will be | |
| 909 * returned as the next position after %sub2 in the output string. | |
| 910 * (5) Note that the output string also includes all the characters | |
| 911 * from the input string that occur after the single substitution. | |
| 912 * </pre> | |
| 913 */ | |
| 914 char * | |
| 915 stringReplaceSubstr(const char *src, | |
| 916 const char *sub1, | |
| 917 const char *sub2, | |
| 918 l_int32 *ploc, | |
| 919 l_int32 *pfound) | |
| 920 { | |
| 921 const char *ptr; | |
| 922 char *dest; | |
| 923 l_int32 nsrc, nsub1, nsub2, len, npre, loc; | |
| 924 | |
| 925 if (pfound) *pfound = 0; | |
| 926 if (!src || !sub1 || !sub2) | |
| 927 return (char *)ERROR_PTR("src, sub1, sub2 not all defined", | |
| 928 __func__, NULL); | |
| 929 | |
| 930 if (ploc) | |
| 931 loc = *ploc; | |
| 932 else | |
| 933 loc = 0; | |
| 934 if (!strcmp(sub1, sub2)) | |
| 935 return stringNew(src); | |
| 936 if ((ptr = strstr(src + loc, sub1)) == NULL) | |
| 937 return stringNew(src); | |
| 938 if (pfound) *pfound = 1; | |
| 939 | |
| 940 nsrc = strlen(src); | |
| 941 nsub1 = strlen(sub1); | |
| 942 nsub2 = strlen(sub2); | |
| 943 len = nsrc + nsub2 - nsub1; | |
| 944 if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) | |
| 945 return (char *)ERROR_PTR("dest not made", __func__, NULL); | |
| 946 npre = ptr - src; | |
| 947 memcpy(dest, src, npre); | |
| 948 strcpy(dest + npre, sub2); | |
| 949 strcpy(dest + npre + nsub2, ptr + nsub1); | |
| 950 if (ploc) *ploc = npre + nsub2; | |
| 951 return dest; | |
| 952 } | |
| 953 | |
| 954 | |
| 955 /*! | |
| 956 * \brief stringFindEachSubstr() | |
| 957 * | |
| 958 * \param[in] src input string; can be of zero length | |
| 959 * \param[in] sub substring to be searched for | |
| 960 * \return dna of offsets where the sequence is found, or NULL if | |
| 961 * none are found or on error | |
| 962 * | |
| 963 * <pre> | |
| 964 * Notes: | |
| 965 * (1) This finds every non-overlapping occurrence in %src of %sub. | |
| 966 * After it finds each match, it moves forward in %src by the length | |
| 967 * of %sub before continuing the search. So for example, | |
| 968 * if you search for the sequence 'aa' in the data 'baaabbb', | |
| 969 * you find one match at position 1. | |
| 970 | |
| 971 * </pre> | |
| 972 */ | |
| 973 L_DNA * | |
| 974 stringFindEachSubstr(const char *src, | |
| 975 const char *sub) | |
| 976 { | |
| 977 if (!src || !sub) | |
| 978 return (L_DNA *)ERROR_PTR("src, sub not both defined", __func__, NULL); | |
| 979 | |
| 980 return arrayFindEachSequence((const l_uint8 *)src, strlen(src), | |
| 981 (const l_uint8 *)sub, strlen(sub)); | |
| 982 } | |
| 983 | |
| 984 | |
| 985 /*! | |
| 986 * \brief stringFindSubstr() | |
| 987 * | |
| 988 * \param[in] src input string; can be of zero length | |
| 989 * \param[in] sub substring to be searched for; must not be empty | |
| 990 * \param[out] ploc [optional] location of substring in src | |
| 991 * \return 1 if found; 0 if not found or on error | |
| 992 * | |
| 993 * <pre> | |
| 994 * Notes: | |
| 995 * (1) This is a wrapper around strstr(). It finds the first | |
| 996 * instance of %sub in %src. If the substring is not found | |
| 997 * and the location is returned, it has the value -1. | |
| 998 * (2) Both %src and %sub must be defined, and %sub must have | |
| 999 * length of at least 1. | |
| 1000 * </pre> | |
| 1001 */ | |
| 1002 l_int32 | |
| 1003 stringFindSubstr(const char *src, | |
| 1004 const char *sub, | |
| 1005 l_int32 *ploc) | |
| 1006 { | |
| 1007 const char *ptr; | |
| 1008 | |
| 1009 if (ploc) *ploc = -1; | |
| 1010 if (!src || !sub) | |
| 1011 return ERROR_INT("src and sub not both defined", __func__, 0); | |
| 1012 if (strlen(sub) == 0) | |
| 1013 return ERROR_INT("substring length 0", __func__, 0); | |
| 1014 if (strlen(src) == 0) | |
| 1015 return 0; | |
| 1016 | |
| 1017 if ((ptr = strstr(src, sub)) == NULL) /* not found */ | |
| 1018 return 0; | |
| 1019 | |
| 1020 if (ploc) | |
| 1021 *ploc = ptr - src; | |
| 1022 return 1; | |
| 1023 } | |
| 1024 | |
| 1025 | |
| 1026 /*! | |
| 1027 * \brief arrayReplaceEachSequence() | |
| 1028 * | |
| 1029 * \param[in] datas source byte array | |
| 1030 * \param[in] dataslen length of source data, in bytes | |
| 1031 * \param[in] seq subarray of bytes to find in source data | |
| 1032 * \param[in] seqlen length of subarray, in bytes | |
| 1033 * \param[in] newseq replacement subarray; can be null | |
| 1034 * \param[in] newseqlen length of replacement subarray, in bytes | |
| 1035 * \param[out] pdatadlen length of dest byte array, in bytes | |
| 1036 * \param[out] pcount [optional] the number of times that sub1 | |
| 1037 * is found in src; 0 if not found | |
| 1038 * \return datad with all all subarrays replaced (or removed) | |
| 1039 * | |
| 1040 * <pre> | |
| 1041 * Notes: | |
| 1042 * (1) The byte arrays %datas, %seq and %newseq are not C strings, | |
| 1043 * because they can contain null bytes. Therefore, for each | |
| 1044 * we must give the length of the array. | |
| 1045 * (2) If %newseq == NULL, this just removes all instances of %seq. | |
| 1046 * Otherwise, it replaces every non-overlapping occurrence of | |
| 1047 * %seq in %datas with %newseq. A new array %datad and its | |
| 1048 * size are returned. See arrayFindEachSequence() for more | |
| 1049 * details on finding non-overlapping occurrences. | |
| 1050 * (3) If no instances of %seq are found, this returns a copy of %datas. | |
| 1051 * (4) The returned %datad is null terminated. | |
| 1052 * (5) Can use stringReplaceEachSubstr() if using C strings. | |
| 1053 * </pre> | |
| 1054 */ | |
| 1055 l_uint8 * | |
| 1056 arrayReplaceEachSequence(const l_uint8 *datas, | |
| 1057 size_t dataslen, | |
| 1058 const l_uint8 *seq, | |
| 1059 size_t seqlen, | |
| 1060 const l_uint8 *newseq, | |
| 1061 size_t newseqlen, | |
| 1062 size_t *pdatadlen, | |
| 1063 l_int32 *pcount) | |
| 1064 { | |
| 1065 l_uint8 *datad; | |
| 1066 size_t newsize; | |
| 1067 l_int32 n, i, j, di, si, index, incr; | |
| 1068 L_DNA *da; | |
| 1069 | |
| 1070 if (pcount) *pcount = 0; | |
| 1071 if (!datas || !seq) | |
| 1072 return (l_uint8 *)ERROR_PTR("datas & seq not both defined", | |
| 1073 __func__, NULL); | |
| 1074 if (!pdatadlen) | |
| 1075 return (l_uint8 *)ERROR_PTR("&datadlen not defined", __func__, NULL); | |
| 1076 *pdatadlen = 0; | |
| 1077 | |
| 1078 /* Identify the locations of the sequence. If there are none, | |
| 1079 * return a copy of %datas. */ | |
| 1080 if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) { | |
| 1081 *pdatadlen = dataslen; | |
| 1082 return l_binaryCopy(datas, dataslen); | |
| 1083 } | |
| 1084 | |
| 1085 /* Allocate the output data; insure null termination */ | |
| 1086 n = l_dnaGetCount(da); | |
| 1087 if (pcount) *pcount = n; | |
| 1088 if (!newseq) newseqlen = 0; | |
| 1089 newsize = dataslen + n * (newseqlen - seqlen) + 4; | |
| 1090 if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) { | |
| 1091 l_dnaDestroy(&da); | |
| 1092 return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL); | |
| 1093 } | |
| 1094 | |
| 1095 /* Replace each sequence instance with a new sequence */ | |
| 1096 l_dnaGetIValue(da, 0, &si); | |
| 1097 for (i = 0, di = 0, index = 0; i < dataslen; i++) { | |
| 1098 if (i == si) { | |
| 1099 index++; | |
| 1100 if (index < n) { | |
| 1101 l_dnaGetIValue(da, index, &si); | |
| 1102 incr = L_MIN(seqlen, si - i); /* amount to remove from datas */ | |
| 1103 } else { | |
| 1104 incr = seqlen; | |
| 1105 } | |
| 1106 i += incr - 1; /* jump over the matched sequence in datas */ | |
| 1107 if (newseq) { /* add new sequence to datad */ | |
| 1108 for (j = 0; j < newseqlen; j++) | |
| 1109 datad[di++] = newseq[j]; | |
| 1110 } | |
| 1111 } else { | |
| 1112 datad[di++] = datas[i]; | |
| 1113 } | |
| 1114 } | |
| 1115 | |
| 1116 *pdatadlen = di; | |
| 1117 l_dnaDestroy(&da); | |
| 1118 return datad; | |
| 1119 } | |
| 1120 | |
| 1121 | |
| 1122 /*! | |
| 1123 * \brief arrayFindEachSequence() | |
| 1124 * | |
| 1125 * \param[in] data byte array | |
| 1126 * \param[in] datalen length of data, in bytes | |
| 1127 * \param[in] sequence subarray of bytes to find in data | |
| 1128 * \param[in] seqlen length of sequence, in bytes | |
| 1129 * \return dna of offsets where the sequence is found, or NULL if | |
| 1130 * none are found or on error | |
| 1131 * | |
| 1132 * <pre> | |
| 1133 * Notes: | |
| 1134 * (1) The byte arrays %data and %sequence are not C strings, | |
| 1135 * because they can contain null bytes. Therefore, for each | |
| 1136 * we must give the length of the array. | |
| 1137 * (2) This finds every non-overlapping occurrence in %data of %sequence. | |
| 1138 * After it finds each match, it moves forward by the length | |
| 1139 * of the sequence before continuing the search. So for example, | |
| 1140 * if you search for the sequence 'aa' in the data 'baaabbb', | |
| 1141 * you find one match at position 1. | |
| 1142 * </pre> | |
| 1143 */ | |
| 1144 L_DNA * | |
| 1145 arrayFindEachSequence(const l_uint8 *data, | |
| 1146 size_t datalen, | |
| 1147 const l_uint8 *sequence, | |
| 1148 size_t seqlen) | |
| 1149 { | |
| 1150 l_int32 start, offset, realoffset, found; | |
| 1151 L_DNA *da; | |
| 1152 | |
| 1153 if (!data || !sequence) | |
| 1154 return (L_DNA *)ERROR_PTR("data & sequence not both defined", | |
| 1155 __func__, NULL); | |
| 1156 | |
| 1157 da = l_dnaCreate(0); | |
| 1158 start = 0; | |
| 1159 while (1) { | |
| 1160 arrayFindSequence(data + start, datalen - start, sequence, seqlen, | |
| 1161 &offset, &found); | |
| 1162 if (found == FALSE) | |
| 1163 break; | |
| 1164 | |
| 1165 realoffset = start + offset; | |
| 1166 l_dnaAddNumber(da, realoffset); | |
| 1167 start = realoffset + seqlen; | |
| 1168 if (start >= datalen) | |
| 1169 break; | |
| 1170 } | |
| 1171 | |
| 1172 if (l_dnaGetCount(da) == 0) | |
| 1173 l_dnaDestroy(&da); | |
| 1174 return da; | |
| 1175 } | |
| 1176 | |
| 1177 | |
| 1178 /*! | |
| 1179 * \brief arrayFindSequence() | |
| 1180 * | |
| 1181 * \param[in] data byte array | |
| 1182 * \param[in] datalen length of data, in bytes | |
| 1183 * \param[in] sequence subarray of bytes to find in data | |
| 1184 * \param[in] seqlen length of sequence, in bytes | |
| 1185 * \param[out] poffset offset from beginning of | |
| 1186 * data where the sequence begins | |
| 1187 * \param[out] pfound 1 if sequence is found; 0 otherwise | |
| 1188 * \return 0 if OK, 1 on error | |
| 1189 * | |
| 1190 * <pre> | |
| 1191 * Notes: | |
| 1192 * (1) The byte arrays 'data' and 'sequence' are in general not C strings, | |
| 1193 * because they can contain null bytes. Therefore, for each | |
| 1194 * we must give the length of the array. | |
| 1195 * (2) This searches for the first occurrence in %data of %sequence, | |
| 1196 * which consists of %seqlen bytes. The parameter %seqlen | |
| 1197 * must not exceed the actual length of the %sequence byte array. | |
| 1198 * (3) If either byte array is a C string, cast the array to | |
| 1199 * (const l_uint8 *) and use strlen() on the string for its length. | |
| 1200 * (4) If the sequence is not found, the offset will be 0, so you | |
| 1201 * must check %found. | |
| 1202 * </pre> | |
| 1203 */ | |
| 1204 l_ok | |
| 1205 arrayFindSequence(const l_uint8 *data, | |
| 1206 size_t datalen, | |
| 1207 const l_uint8 *sequence, | |
| 1208 size_t seqlen, | |
| 1209 l_int32 *poffset, | |
| 1210 l_int32 *pfound) | |
| 1211 { | |
| 1212 l_int32 i, j, found, lastpos; | |
| 1213 | |
| 1214 if (poffset) *poffset = 0; | |
| 1215 if (pfound) *pfound = FALSE; | |
| 1216 if (!data || !sequence) | |
| 1217 return ERROR_INT("data & sequence not both defined", __func__, 1); | |
| 1218 if (!poffset || !pfound) | |
| 1219 return ERROR_INT("&offset and &found not defined", __func__, 1); | |
| 1220 | |
| 1221 lastpos = datalen - seqlen + 1; | |
| 1222 found = FALSE; | |
| 1223 for (i = 0; i < lastpos; i++) { | |
| 1224 for (j = 0; j < seqlen; j++) { | |
| 1225 if (data[i + j] != sequence[j]) | |
| 1226 break; | |
| 1227 if (j == seqlen - 1) | |
| 1228 found = TRUE; | |
| 1229 } | |
| 1230 if (found == TRUE) | |
| 1231 break; | |
| 1232 } | |
| 1233 | |
| 1234 if (found == TRUE) { | |
| 1235 *poffset = i; | |
| 1236 *pfound = TRUE; | |
| 1237 } | |
| 1238 return 0; | |
| 1239 } | |
| 1240 | |
| 1241 | |
| 1242 /*--------------------------------------------------------------------* | |
| 1243 * Safe realloc * | |
| 1244 *--------------------------------------------------------------------*/ | |
| 1245 /*! | |
| 1246 * \brief reallocNew() | |
| 1247 * | |
| 1248 * \param[in,out] pindata nulls indata before reallocing | |
| 1249 * \param[in] oldsize size of input data to be copied, in bytes | |
| 1250 * \param[in] newsize size of buffer to be reallocated in bytes | |
| 1251 * \return ptr to new data, or NULL on error | |
| 1252 * | |
| 1253 * Action: !N.B. 3) and (4! | |
| 1254 * 1 Allocates memory, initialized to 0 | |
| 1255 * 2 Copies as much of the input data as possible | |
| 1256 * to the new block, truncating the copy if necessary | |
| 1257 * 3 Frees the input data | |
| 1258 * 4 Zeroes the input data ptr | |
| 1259 * | |
| 1260 * <pre> | |
| 1261 * Notes: | |
| 1262 * (1) If newsize == 0, frees input data and nulls ptr | |
| 1263 * (2) If input data is null, only callocs new memory | |
| 1264 * (3) This differs from realloc in that it always allocates | |
| 1265 * new memory (if newsize > 0) and initializes it to 0, | |
| 1266 * it requires the amount of old data to be copied, | |
| 1267 * and it takes the address of the input ptr and | |
| 1268 * nulls the handle. | |
| 1269 * </pre> | |
| 1270 */ | |
| 1271 void * | |
| 1272 reallocNew(void **pindata, | |
| 1273 size_t oldsize, | |
| 1274 size_t newsize) | |
| 1275 { | |
| 1276 size_t minsize; | |
| 1277 void *indata; | |
| 1278 void *newdata; | |
| 1279 | |
| 1280 if (!pindata) | |
| 1281 return ERROR_PTR("input data not defined", __func__, NULL); | |
| 1282 indata = *pindata; | |
| 1283 | |
| 1284 if (newsize == 0) { /* nonstandard usage */ | |
| 1285 if (indata) { | |
| 1286 LEPT_FREE(indata); | |
| 1287 *pindata = NULL; | |
| 1288 } | |
| 1289 return NULL; | |
| 1290 } | |
| 1291 | |
| 1292 if (!indata) { /* nonstandard usage */ | |
| 1293 if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) | |
| 1294 return ERROR_PTR("newdata not made", __func__, NULL); | |
| 1295 return newdata; | |
| 1296 } | |
| 1297 | |
| 1298 /* Standard usage */ | |
| 1299 if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) | |
| 1300 return ERROR_PTR("newdata not made", __func__, NULL); | |
| 1301 minsize = L_MIN(oldsize, newsize); | |
| 1302 memcpy(newdata, indata, minsize); | |
| 1303 LEPT_FREE(indata); | |
| 1304 *pindata = NULL; | |
| 1305 return newdata; | |
| 1306 } | |
| 1307 | |
| 1308 | |
| 1309 /*--------------------------------------------------------------------* | |
| 1310 * Read and write between file and memory * | |
| 1311 *--------------------------------------------------------------------*/ | |
| 1312 /*! | |
| 1313 * \brief l_binaryRead() | |
| 1314 * | |
| 1315 * \param[in] filename | |
| 1316 * \param[out] pnbytes number of bytes read | |
| 1317 * \return data, or NULL on error | |
| 1318 */ | |
| 1319 l_uint8 * | |
| 1320 l_binaryRead(const char *filename, | |
| 1321 size_t *pnbytes) | |
| 1322 { | |
| 1323 l_uint8 *data; | |
| 1324 FILE *fp; | |
| 1325 | |
| 1326 if (!pnbytes) | |
| 1327 return (l_uint8 *)ERROR_PTR("pnbytes not defined", __func__, NULL); | |
| 1328 *pnbytes = 0; | |
| 1329 if (!filename) | |
| 1330 return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 1331 | |
| 1332 if ((fp = fopenReadStream(filename)) == NULL) | |
| 1333 return (l_uint8 *)ERROR_PTR_1("file stream not opened", | |
| 1334 filename, __func__, NULL); | |
| 1335 data = l_binaryReadStream(fp, pnbytes); | |
| 1336 fclose(fp); | |
| 1337 return data; | |
| 1338 } | |
| 1339 | |
| 1340 | |
| 1341 /*! | |
| 1342 * \brief l_binaryReadStream() | |
| 1343 * | |
| 1344 * \param[in] fp file stream opened to read; can be stdin | |
| 1345 * \param[out] pnbytes number of bytes read | |
| 1346 * \return null-terminated array, or NULL on error; reading 0 bytes | |
| 1347 * is not an error | |
| 1348 * | |
| 1349 * <pre> | |
| 1350 * Notes: | |
| 1351 * (1) The returned array is terminated with a null byte so that it can | |
| 1352 * be used to read ascii data from a file into a proper C string. | |
| 1353 * (2) This can be used to capture data that is piped in via stdin, | |
| 1354 * because it does not require seeking within the file. | |
| 1355 * (3) For example, you can read an image from stdin into memory | |
| 1356 * using shell redirection, with one of these shell commands: | |
| 1357 * \code | |
| 1358 * cat <imagefile> | readprog | |
| 1359 * readprog < <imagefile> | |
| 1360 * \endcode | |
| 1361 * where readprog is: | |
| 1362 * \code | |
| 1363 * l_uint8 *data = l_binaryReadStream(stdin, &nbytes); | |
| 1364 * Pix *pix = pixReadMem(data, nbytes); | |
| 1365 * \endcode | |
| 1366 * </pre> | |
| 1367 */ | |
| 1368 l_uint8 * | |
| 1369 l_binaryReadStream(FILE *fp, | |
| 1370 size_t *pnbytes) | |
| 1371 { | |
| 1372 l_uint8 *data; | |
| 1373 l_int32 seekable, navail, nadd, nread; | |
| 1374 L_BBUFFER *bb; | |
| 1375 | |
| 1376 if (!pnbytes) | |
| 1377 return (l_uint8 *)ERROR_PTR("&nbytes not defined", __func__, NULL); | |
| 1378 *pnbytes = 0; | |
| 1379 if (!fp) | |
| 1380 return (l_uint8 *)ERROR_PTR("fp not defined", __func__, NULL); | |
| 1381 | |
| 1382 /* Test if the stream is seekable, by attempting to seek to | |
| 1383 * the start of data. This is a no-op. If it is seekable, use | |
| 1384 * l_binaryReadSelectStream() to determine the size of the | |
| 1385 * data to be read in advance. */ | |
| 1386 seekable = (ftell(fp) == 0) ? 1 : 0; | |
| 1387 if (seekable) | |
| 1388 return l_binaryReadSelectStream(fp, 0, 0, pnbytes); | |
| 1389 | |
| 1390 /* If it is not seekable, use the bbuffer to realloc memory | |
| 1391 * as needed during reading. */ | |
| 1392 bb = bbufferCreate(NULL, 4096); | |
| 1393 while (1) { | |
| 1394 navail = bb->nalloc - bb->n; | |
| 1395 if (navail < 4096) { | |
| 1396 nadd = L_MAX(bb->nalloc, 4096); | |
| 1397 bbufferExtendArray(bb, nadd); | |
| 1398 } | |
| 1399 nread = fread((void *)(bb->array + bb->n), 1, 4096, fp); | |
| 1400 bb->n += nread; | |
| 1401 if (nread != 4096) break; | |
| 1402 } | |
| 1403 | |
| 1404 /* Copy the data to a new array sized for the data, because | |
| 1405 * the bbuffer array can be nearly twice the size we need. */ | |
| 1406 if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) { | |
| 1407 memcpy(data, bb->array, bb->n); | |
| 1408 *pnbytes = bb->n; | |
| 1409 } else { | |
| 1410 L_ERROR("calloc fail for data\n", __func__); | |
| 1411 } | |
| 1412 | |
| 1413 bbufferDestroy(&bb); | |
| 1414 return data; | |
| 1415 } | |
| 1416 | |
| 1417 | |
| 1418 /*! | |
| 1419 * \brief l_binaryReadSelect() | |
| 1420 * | |
| 1421 * \param[in] filename | |
| 1422 * \param[in] start first byte to read | |
| 1423 * \param[in] nbytes number of bytes to read; use 0 to read to end of file | |
| 1424 * \param[out] pnread number of bytes actually read | |
| 1425 * \return data, or NULL on error | |
| 1426 * | |
| 1427 * <pre> | |
| 1428 * Notes: | |
| 1429 * (1) The returned array is terminated with a null byte so that it can | |
| 1430 * be used to read ascii data from a file into a proper C string. | |
| 1431 * </pre> | |
| 1432 */ | |
| 1433 l_uint8 * | |
| 1434 l_binaryReadSelect(const char *filename, | |
| 1435 size_t start, | |
| 1436 size_t nbytes, | |
| 1437 size_t *pnread) | |
| 1438 { | |
| 1439 l_uint8 *data; | |
| 1440 FILE *fp; | |
| 1441 | |
| 1442 if (!pnread) | |
| 1443 return (l_uint8 *)ERROR_PTR("pnread not defined", __func__, NULL); | |
| 1444 *pnread = 0; | |
| 1445 if (!filename) | |
| 1446 return (l_uint8 *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 1447 | |
| 1448 if ((fp = fopenReadStream(filename)) == NULL) | |
| 1449 return (l_uint8 *)ERROR_PTR_1("file stream not opened", | |
| 1450 filename, __func__, NULL); | |
| 1451 data = l_binaryReadSelectStream(fp, start, nbytes, pnread); | |
| 1452 fclose(fp); | |
| 1453 return data; | |
| 1454 } | |
| 1455 | |
| 1456 | |
| 1457 /*! | |
| 1458 * \brief l_binaryReadSelectStream() | |
| 1459 * | |
| 1460 * \param[in] fp file stream | |
| 1461 * \param[in] start first byte to read | |
| 1462 * \param[in] nbytes number of bytes to read; use 0 to read to end of file | |
| 1463 * \param[out] pnread number of bytes actually read | |
| 1464 * \return null-terminated array, or NULL on error; reading 0 bytes | |
| 1465 * is not an error | |
| 1466 * | |
| 1467 * <pre> | |
| 1468 * Notes: | |
| 1469 * (1) The returned array is terminated with a null byte so that it can | |
| 1470 * be used to read ascii data from a file into a proper C string. | |
| 1471 * If the file to be read is empty and %start == 0, an array | |
| 1472 * with a single null byte is returned. | |
| 1473 * (2) Side effect: the stream pointer is re-positioned to the | |
| 1474 * beginning of the file. | |
| 1475 * </pre> | |
| 1476 */ | |
| 1477 l_uint8 * | |
| 1478 l_binaryReadSelectStream(FILE *fp, | |
| 1479 size_t start, | |
| 1480 size_t nbytes, | |
| 1481 size_t *pnread) | |
| 1482 { | |
| 1483 l_uint8 *data; | |
| 1484 size_t bytesleft, bytestoread, nread, filebytes; | |
| 1485 | |
| 1486 if (!pnread) | |
| 1487 return (l_uint8 *)ERROR_PTR("&nread not defined", __func__, NULL); | |
| 1488 *pnread = 0; | |
| 1489 if (!fp) | |
| 1490 return (l_uint8 *)ERROR_PTR("stream not defined", __func__, NULL); | |
| 1491 | |
| 1492 /* Verify and adjust the parameters if necessary */ | |
| 1493 fseek(fp, 0, SEEK_END); /* EOF */ | |
| 1494 filebytes = ftell(fp); | |
| 1495 fseek(fp, 0, SEEK_SET); | |
| 1496 if (start > filebytes) { | |
| 1497 L_ERROR("start = %zu but filebytes = %zu\n", __func__, | |
| 1498 start, filebytes); | |
| 1499 return NULL; | |
| 1500 } | |
| 1501 if (filebytes == 0) /* start == 0; nothing to read; return null byte */ | |
| 1502 return (l_uint8 *)LEPT_CALLOC(1, 1); | |
| 1503 bytesleft = filebytes - start; /* greater than 0 */ | |
| 1504 if (nbytes == 0) nbytes = bytesleft; | |
| 1505 bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft; | |
| 1506 | |
| 1507 /* Read the data */ | |
| 1508 if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL) | |
| 1509 return (l_uint8 *)ERROR_PTR("calloc fail for data", __func__, NULL); | |
| 1510 fseek(fp, start, SEEK_SET); | |
| 1511 nread = fread(data, 1, bytestoread, fp); | |
| 1512 if (nbytes != nread) | |
| 1513 L_INFO("%zu bytes requested; %zu bytes read\n", __func__, | |
| 1514 nbytes, nread); | |
| 1515 *pnread = nread; | |
| 1516 fseek(fp, 0, SEEK_SET); | |
| 1517 return data; | |
| 1518 } | |
| 1519 | |
| 1520 | |
| 1521 /*! | |
| 1522 * \brief l_binaryWrite() | |
| 1523 * | |
| 1524 * \param[in] filename output file | |
| 1525 * \param[in] operation "w" for write; "a" for append | |
| 1526 * \param[in] data binary data to be written | |
| 1527 * \param[in] nbytes size of data array | |
| 1528 * \return 0 if OK; 1 on error | |
| 1529 */ | |
| 1530 l_ok | |
| 1531 l_binaryWrite(const char *filename, | |
| 1532 const char *operation, | |
| 1533 const void *data, | |
| 1534 size_t nbytes) | |
| 1535 { | |
| 1536 char actualOperation[20]; | |
| 1537 FILE *fp; | |
| 1538 | |
| 1539 if (!filename) | |
| 1540 return ERROR_INT("filename not defined", __func__, 1); | |
| 1541 if (!operation) | |
| 1542 return ERROR_INT("operation not defined", __func__, 1); | |
| 1543 if (!data) | |
| 1544 return ERROR_INT("data not defined", __func__, 1); | |
| 1545 if (nbytes <= 0) | |
| 1546 return ERROR_INT("nbytes must be > 0", __func__, 1); | |
| 1547 | |
| 1548 if (strcmp(operation, "w") && strcmp(operation, "a")) | |
| 1549 return ERROR_INT("operation not one of {'w','a'}", __func__, 1); | |
| 1550 | |
| 1551 /* The 'b' flag to fopen() is ignored for all POSIX | |
| 1552 * conforming systems. However, Windows needs the 'b' flag. */ | |
| 1553 stringCopy(actualOperation, operation, 2); | |
| 1554 stringCat(actualOperation, 20, "b"); | |
| 1555 | |
| 1556 if ((fp = fopenWriteStream(filename, actualOperation)) == NULL) | |
| 1557 return ERROR_INT_1("stream not opened", filename, __func__, 1); | |
| 1558 fwrite(data, 1, nbytes, fp); | |
| 1559 fclose(fp); | |
| 1560 return 0; | |
| 1561 } | |
| 1562 | |
| 1563 | |
| 1564 /*! | |
| 1565 * \brief nbytesInFile() | |
| 1566 * | |
| 1567 * \param[in] filename | |
| 1568 * \return nbytes in file; 0 on error | |
| 1569 */ | |
| 1570 size_t | |
| 1571 nbytesInFile(const char *filename) | |
| 1572 { | |
| 1573 size_t nbytes; | |
| 1574 FILE *fp; | |
| 1575 | |
| 1576 if (!filename) | |
| 1577 return ERROR_INT("filename not defined", __func__, 0); | |
| 1578 if ((fp = fopenReadStream(filename)) == NULL) | |
| 1579 return ERROR_INT_1("stream not opened", filename, __func__, 0); | |
| 1580 nbytes = fnbytesInFile(fp); | |
| 1581 fclose(fp); | |
| 1582 return nbytes; | |
| 1583 } | |
| 1584 | |
| 1585 | |
| 1586 /*! | |
| 1587 * \brief fnbytesInFile() | |
| 1588 * | |
| 1589 * \param[in] fp file stream | |
| 1590 * \return nbytes in file; 0 on error | |
| 1591 */ | |
| 1592 size_t | |
| 1593 fnbytesInFile(FILE *fp) | |
| 1594 { | |
| 1595 l_int64 pos, nbytes; | |
| 1596 | |
| 1597 if (!fp) | |
| 1598 return ERROR_INT("stream not open", __func__, 0); | |
| 1599 | |
| 1600 pos = ftell(fp); /* initial position */ | |
| 1601 if (pos < 0) | |
| 1602 return ERROR_INT("seek position must be > 0", __func__, 0); | |
| 1603 fseek(fp, 0, SEEK_END); /* EOF */ | |
| 1604 nbytes = ftell(fp); | |
| 1605 if (nbytes < 0) | |
| 1606 return ERROR_INT("nbytes is < 0", __func__, 0); | |
| 1607 fseek(fp, pos, SEEK_SET); /* back to initial position */ | |
| 1608 return nbytes; | |
| 1609 } | |
| 1610 | |
| 1611 | |
| 1612 /*--------------------------------------------------------------------* | |
| 1613 * Copy and compare in memory * | |
| 1614 *--------------------------------------------------------------------*/ | |
| 1615 /*! | |
| 1616 * \brief l_binaryCopy() | |
| 1617 * | |
| 1618 * \param[in] datas | |
| 1619 * \param[in] size of data array | |
| 1620 * \return datad on heap, or NULL on error | |
| 1621 * | |
| 1622 * <pre> | |
| 1623 * Notes: | |
| 1624 * (1) We add 4 bytes to the zeroed output because in some cases | |
| 1625 * (e.g., string handling) it is important to have the data | |
| 1626 * be null terminated. This guarantees that after the memcpy, | |
| 1627 * the result is automatically null terminated. | |
| 1628 * </pre> | |
| 1629 */ | |
| 1630 l_uint8 * | |
| 1631 l_binaryCopy(const l_uint8 *datas, | |
| 1632 size_t size) | |
| 1633 { | |
| 1634 l_uint8 *datad; | |
| 1635 | |
| 1636 if (!datas) | |
| 1637 return (l_uint8 *)ERROR_PTR("datas not defined", __func__, NULL); | |
| 1638 | |
| 1639 if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL) | |
| 1640 return (l_uint8 *)ERROR_PTR("datad not made", __func__, NULL); | |
| 1641 memcpy(datad, datas, size); | |
| 1642 return datad; | |
| 1643 } | |
| 1644 | |
| 1645 | |
| 1646 /*! | |
| 1647 * \brief l_binaryCompare() | |
| 1648 * | |
| 1649 * \param[in] data1 | |
| 1650 * \param[in] size1 of data1 | |
| 1651 * \param[in] data2 | |
| 1652 * \param[in] size2 of data1 | |
| 1653 * \param[out] psame (1 if the same, 0 if different) | |
| 1654 * \return 0 if OK, 1 on error | |
| 1655 * | |
| 1656 * <pre> | |
| 1657 * Notes: | |
| 1658 * (1) This can also be used to compare C strings str1 and str2. | |
| 1659 * If the string lengths are not known, use strlen(): | |
| 1660 * l_binaryCompare((l_uint8 *)str1, strlen(str1), | |
| 1661 (l_uint8 *)str2, strlen(str2)); | |
| 1662 * </pre> | |
| 1663 */ | |
| 1664 l_ok | |
| 1665 l_binaryCompare(const l_uint8 *data1, | |
| 1666 size_t size1, | |
| 1667 const l_uint8 *data2, | |
| 1668 size_t size2, | |
| 1669 l_int32 *psame) | |
| 1670 { | |
| 1671 l_int32 i; | |
| 1672 | |
| 1673 if (!psame) | |
| 1674 return ERROR_INT("&same not defined", __func__, 1); | |
| 1675 *psame = FALSE; | |
| 1676 if (!data1 || !data2) | |
| 1677 return ERROR_INT("data1 and data2 not both defined", __func__, 1); | |
| 1678 if (size1 != size2) return 0; | |
| 1679 for (i = 0; i < size1; i++) { | |
| 1680 if (data1[i] != data2[i]) | |
| 1681 return 0; | |
| 1682 } | |
| 1683 *psame = TRUE; | |
| 1684 return 0; | |
| 1685 } | |
| 1686 | |
| 1687 | |
| 1688 /*--------------------------------------------------------------------* | |
| 1689 * File copy operations * | |
| 1690 *--------------------------------------------------------------------*/ | |
| 1691 /*! | |
| 1692 * \brief fileCopy() | |
| 1693 * | |
| 1694 * \param[in] srcfile copy from this file | |
| 1695 * \param[in] newfile copy to this file | |
| 1696 * \return 0 if OK, 1 on error | |
| 1697 */ | |
| 1698 l_ok | |
| 1699 fileCopy(const char *srcfile, | |
| 1700 const char *newfile) | |
| 1701 { | |
| 1702 l_int32 ret; | |
| 1703 size_t nbytes; | |
| 1704 l_uint8 *data; | |
| 1705 | |
| 1706 if (!srcfile) | |
| 1707 return ERROR_INT("srcfile not defined", __func__, 1); | |
| 1708 if (!newfile) | |
| 1709 return ERROR_INT("newfile not defined", __func__, 1); | |
| 1710 | |
| 1711 if ((data = l_binaryRead(srcfile, &nbytes)) == NULL) | |
| 1712 return ERROR_INT("data not returned", __func__, 1); | |
| 1713 ret = l_binaryWrite(newfile, "w", data, nbytes); | |
| 1714 LEPT_FREE(data); | |
| 1715 return ret; | |
| 1716 } | |
| 1717 | |
| 1718 | |
| 1719 /*! | |
| 1720 * \brief fileConcatenate() | |
| 1721 * | |
| 1722 * \param[in] srcfile append data from this file | |
| 1723 * \param[in] destfile add data to this file | |
| 1724 * \return 0 if OK, 1 on error | |
| 1725 */ | |
| 1726 l_ok | |
| 1727 fileConcatenate(const char *srcfile, | |
| 1728 const char *destfile) | |
| 1729 { | |
| 1730 size_t nbytes; | |
| 1731 l_uint8 *data; | |
| 1732 | |
| 1733 if (!srcfile) | |
| 1734 return ERROR_INT("srcfile not defined", __func__, 1); | |
| 1735 if (!destfile) | |
| 1736 return ERROR_INT("destfile not defined", __func__, 1); | |
| 1737 | |
| 1738 data = l_binaryRead(srcfile, &nbytes); | |
| 1739 l_binaryWrite(destfile, "a", data, nbytes); | |
| 1740 LEPT_FREE(data); | |
| 1741 return 0; | |
| 1742 } | |
| 1743 | |
| 1744 | |
| 1745 /*! | |
| 1746 * \brief fileAppendString() | |
| 1747 * | |
| 1748 * \param[in] filename | |
| 1749 * \param[in] str string to append to file | |
| 1750 * \return 0 if OK, 1 on error | |
| 1751 */ | |
| 1752 l_ok | |
| 1753 fileAppendString(const char *filename, | |
| 1754 const char *str) | |
| 1755 { | |
| 1756 FILE *fp; | |
| 1757 | |
| 1758 if (!filename) | |
| 1759 return ERROR_INT("filename not defined", __func__, 1); | |
| 1760 if (!str) | |
| 1761 return ERROR_INT("str not defined", __func__, 1); | |
| 1762 | |
| 1763 if ((fp = fopenWriteStream(filename, "a")) == NULL) | |
| 1764 return ERROR_INT_1("stream not opened", filename, __func__, 1); | |
| 1765 fprintf(fp, "%s", str); | |
| 1766 fclose(fp); | |
| 1767 return 0; | |
| 1768 } | |
| 1769 | |
| 1770 | |
| 1771 /*--------------------------------------------------------------------* | |
| 1772 * File split operations * | |
| 1773 *--------------------------------------------------------------------*/ | |
| 1774 /*! | |
| 1775 * \brief fileSplitLinesUniform() | |
| 1776 * | |
| 1777 * \param[in] filename input file | |
| 1778 * \param[in] n number of output files (>= 1) | |
| 1779 * \param[in] save_empty 1 to save empty lines; 0 to remove them | |
| 1780 * \param[in] rootpath root pathname of output files | |
| 1781 * \param[in] ext output extension, including the '.'; can be NULL | |
| 1782 * \return 0 if OK, 1 on error | |
| 1783 * | |
| 1784 * <pre> | |
| 1785 * Notes: | |
| 1786 * (1) This splits an input text file into %n files with roughly | |
| 1787 * equal numbers of text lines in each file. | |
| 1788 * (2) if %save_empty == 1, empty lines are included, and concatention | |
| 1789 * of the text in the split files will be identical to the original. | |
| 1790 * (3) The output filenames are in the form: | |
| 1791 * <rootpath>_N.<ext>, N = 1, ... n | |
| 1792 * (4) This handles the temp directory pathname conversion where needed: | |
| 1793 * /tmp ==> [OS specific temp directory] | |
| 1794 * (5) Files can also be sharded into sets of lines by the program 'split': | |
| 1795 * split -n l/<n> <filename> | |
| 1796 * Using 'split', the resulting files have approximately equal | |
| 1797 * numbers of bytes, rather than equal numbers of lines. | |
| 1798 * </pre> | |
| 1799 */ | |
| 1800 l_ok | |
| 1801 fileSplitLinesUniform(const char *filename, | |
| 1802 l_int32 n, | |
| 1803 l_int32 save_empty, | |
| 1804 const char *rootpath, | |
| 1805 const char *ext) | |
| 1806 { | |
| 1807 l_int32 i, totlines, nlines, index; | |
| 1808 size_t nbytes; | |
| 1809 l_uint8 *data; | |
| 1810 char *str; | |
| 1811 char outname[512]; | |
| 1812 NUMA *na; | |
| 1813 SARRAY *sa; | |
| 1814 | |
| 1815 if (!filename) | |
| 1816 return ERROR_INT("filename not defined", __func__, 1); | |
| 1817 if (!rootpath) | |
| 1818 return ERROR_INT("rootpath not defined", __func__, 1); | |
| 1819 if (n <= 0) | |
| 1820 return ERROR_INT("n must be > 0", __func__, 1); | |
| 1821 if (save_empty != 0 && save_empty != 1) | |
| 1822 return ERROR_INT("save_empty not 0 or 1", __func__, 1); | |
| 1823 | |
| 1824 /* Make sarray of lines; the newlines are stripped off */ | |
| 1825 if ((data = l_binaryRead(filename, &nbytes)) == NULL) | |
| 1826 return ERROR_INT("data not read", __func__, 1); | |
| 1827 sa = sarrayCreateLinesFromString((const char *)data, save_empty); | |
| 1828 LEPT_FREE(data); | |
| 1829 if (!sa) | |
| 1830 return ERROR_INT("sa not made", __func__, 1); | |
| 1831 totlines = sarrayGetCount(sa); | |
| 1832 if (n > totlines) { | |
| 1833 sarrayDestroy(&sa); | |
| 1834 L_ERROR("num files = %d > num lines = %d\n", __func__, n, totlines); | |
| 1835 return 1; | |
| 1836 } | |
| 1837 | |
| 1838 /* Write n sets of lines to n files, adding the newlines back */ | |
| 1839 na = numaGetUniformBinSizes(totlines, n); | |
| 1840 index = 0; | |
| 1841 for (i = 0; i < n; i++) { | |
| 1842 if (ext == NULL) | |
| 1843 snprintf(outname, sizeof(outname), "%s_%d", rootpath, i); | |
| 1844 else | |
| 1845 snprintf(outname, sizeof(outname), "%s_%d%s", rootpath, i, ext); | |
| 1846 numaGetIValue(na, i, &nlines); | |
| 1847 str = sarrayToStringRange(sa, index, nlines, 1); /* add newlines */ | |
| 1848 l_binaryWrite(outname, "w", str, strlen(str)); | |
| 1849 LEPT_FREE(str); | |
| 1850 index += nlines; | |
| 1851 } | |
| 1852 numaDestroy(&na); | |
| 1853 sarrayDestroy(&sa); | |
| 1854 return 0; | |
| 1855 } | |
| 1856 | |
| 1857 | |
| 1858 /*--------------------------------------------------------------------* | |
| 1859 * Multi-platform functions for opening file streams * | |
| 1860 *--------------------------------------------------------------------*/ | |
| 1861 /*! | |
| 1862 * \brief fopenReadStream() | |
| 1863 * | |
| 1864 * \param[in] filename | |
| 1865 * \return stream, or NULL on error | |
| 1866 * | |
| 1867 * <pre> | |
| 1868 * Notes: | |
| 1869 * (1) This should be used whenever you want to run fopen() to | |
| 1870 * read from a stream. Never call fopen() directory. | |
| 1871 * (2) This handles the temp directory pathname conversion where needed: | |
| 1872 * /tmp ==> [OS specific temp directory] | |
| 1873 * </pre> | |
| 1874 */ | |
| 1875 FILE * | |
| 1876 fopenReadStream(const char *filename) | |
| 1877 { | |
| 1878 char *fname, *tail; | |
| 1879 FILE *fp; | |
| 1880 | |
| 1881 if (!filename) | |
| 1882 return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 1883 | |
| 1884 /* Try input filename */ | |
| 1885 fname = genPathname(filename, NULL); | |
| 1886 fp = fopen(fname, "rb"); | |
| 1887 LEPT_FREE(fname); | |
| 1888 if (fp) return fp; | |
| 1889 | |
| 1890 /* Else, strip directory and try locally */ | |
| 1891 splitPathAtDirectory(filename, NULL, &tail); | |
| 1892 if (!tail) | |
| 1893 return (FILE*)ERROR_PTR_1("tail not found", filename, __func__, NULL); | |
| 1894 fp = fopen(tail, "rb"); | |
| 1895 if (!fp) | |
| 1896 L_ERROR("failed to open locally with tail %s for filename %s\n", | |
| 1897 __func__, tail, filename); | |
| 1898 LEPT_FREE(tail); | |
| 1899 return fp; | |
| 1900 } | |
| 1901 | |
| 1902 | |
| 1903 /*! | |
| 1904 * \brief fopenWriteStream() | |
| 1905 * | |
| 1906 * \param[in] filename | |
| 1907 * \param[in] modestring | |
| 1908 * \return stream, or NULL on error | |
| 1909 * | |
| 1910 * <pre> | |
| 1911 * Notes: | |
| 1912 * (1) This should be used whenever you want to run fopen() to | |
| 1913 * write or append to a stream. Never call fopen() directory. | |
| 1914 * (2) This handles the temp directory pathname conversion where needed: | |
| 1915 * /tmp ==> [OS specific temp directory] | |
| 1916 * </pre> | |
| 1917 */ | |
| 1918 FILE * | |
| 1919 fopenWriteStream(const char *filename, | |
| 1920 const char *modestring) | |
| 1921 { | |
| 1922 char *fname; | |
| 1923 FILE *fp; | |
| 1924 | |
| 1925 if (!filename) | |
| 1926 return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 1927 | |
| 1928 fname = genPathname(filename, NULL); | |
| 1929 fp = fopen(fname, modestring); | |
| 1930 if (!fp) | |
| 1931 fp = (FILE *)ERROR_PTR_1("stream not opened", fname, __func__, NULL); | |
| 1932 LEPT_FREE(fname); | |
| 1933 return fp; | |
| 1934 } | |
| 1935 | |
| 1936 | |
| 1937 /*! | |
| 1938 * \brief fopenReadFromMemory() | |
| 1939 * | |
| 1940 * \param[in] data, size | |
| 1941 * \return file stream, or NULL on error | |
| 1942 * | |
| 1943 * <pre> | |
| 1944 * Notes: | |
| 1945 * (1) Work-around if fmemopen() not available. | |
| 1946 * (2) Windows tmpfile() writes into the root C:\ directory, which | |
| 1947 * requires admin privileges. This also works around that. | |
| 1948 * </pre> | |
| 1949 */ | |
| 1950 FILE * | |
| 1951 fopenReadFromMemory(const l_uint8 *data, | |
| 1952 size_t size) | |
| 1953 { | |
| 1954 FILE *fp; | |
| 1955 | |
| 1956 if (!data) | |
| 1957 return (FILE *)ERROR_PTR("data not defined", __func__, NULL); | |
| 1958 | |
| 1959 #if HAVE_FMEMOPEN | |
| 1960 if ((fp = fmemopen((void *)data, size, "rb")) == NULL) | |
| 1961 return (FILE *)ERROR_PTR("stream not opened", __func__, NULL); | |
| 1962 #else /* write to tmp file */ | |
| 1963 L_INFO("no fmemopen API --> work-around: write to temp file\n", __func__); | |
| 1964 #ifdef _WIN32 | |
| 1965 if ((fp = fopenWriteWinTempfile()) == NULL) | |
| 1966 return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL); | |
| 1967 #else | |
| 1968 if ((fp = tmpfile()) == NULL) | |
| 1969 return (FILE *)ERROR_PTR("tmpfile stream not opened", __func__, NULL); | |
| 1970 #endif /* _WIN32 */ | |
| 1971 fwrite(data, 1, size, fp); | |
| 1972 rewind(fp); | |
| 1973 #endif /* HAVE_FMEMOPEN */ | |
| 1974 | |
| 1975 return fp; | |
| 1976 } | |
| 1977 | |
| 1978 | |
| 1979 /*--------------------------------------------------------------------* | |
| 1980 * Opening a Windows tmpfile for writing * | |
| 1981 *--------------------------------------------------------------------*/ | |
| 1982 /*! | |
| 1983 * \brief fopenWriteWinTempfile() | |
| 1984 * | |
| 1985 * \return file stream, or NULL on error | |
| 1986 * | |
| 1987 * <pre> | |
| 1988 * Notes: | |
| 1989 * (1) The Windows version of tmpfile() writes into the root | |
| 1990 * C:\ directory, which requires admin privileges. This | |
| 1991 * function provides an alternative implementation. | |
| 1992 * </pre> | |
| 1993 */ | |
| 1994 FILE * | |
| 1995 fopenWriteWinTempfile(void) | |
| 1996 { | |
| 1997 #ifdef _WIN32 | |
| 1998 l_int32 handle; | |
| 1999 FILE *fp; | |
| 2000 char *filename; | |
| 2001 | |
| 2002 if ((filename = l_makeTempFilename()) == NULL) { | |
| 2003 L_ERROR("l_makeTempFilename failed, %s\n", __func__, strerror(errno)); | |
| 2004 return NULL; | |
| 2005 } | |
| 2006 | |
| 2007 handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED | | |
| 2008 _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE); | |
| 2009 lept_free(filename); | |
| 2010 if (handle == -1) { | |
| 2011 L_ERROR("_open failed, %s\n", __func__, strerror(errno)); | |
| 2012 return NULL; | |
| 2013 } | |
| 2014 | |
| 2015 if ((fp = _fdopen(handle, "r+b")) == NULL) { | |
| 2016 L_ERROR("_fdopen failed, %s\n", __func__, strerror(errno)); | |
| 2017 return NULL; | |
| 2018 } | |
| 2019 | |
| 2020 return fp; | |
| 2021 #else | |
| 2022 return NULL; | |
| 2023 #endif /* _WIN32 */ | |
| 2024 } | |
| 2025 | |
| 2026 | |
| 2027 /*--------------------------------------------------------------------* | |
| 2028 * Multi-platform functions that avoid C-runtime boundary * | |
| 2029 * crossing for applications with Windows DLLs * | |
| 2030 *--------------------------------------------------------------------*/ | |
| 2031 /* | |
| 2032 * Problems arise when pointers to streams and data are passed | |
| 2033 * between two Windows DLLs that have been generated with different | |
| 2034 * C runtimes. To avoid this, leptonica provides wrappers for | |
| 2035 * several C library calls. | |
| 2036 */ | |
| 2037 /*! | |
| 2038 * \brief lept_fopen() | |
| 2039 * | |
| 2040 * \param[in] filename | |
| 2041 * \param[in] mode same as for fopen(); e.g., "rb" | |
| 2042 * \return stream or NULL on error | |
| 2043 * | |
| 2044 * <pre> | |
| 2045 * Notes: | |
| 2046 * (1) This must be used by any application that passes | |
| 2047 * a file handle to a leptonica Windows DLL. | |
| 2048 * </pre> | |
| 2049 */ | |
| 2050 FILE * | |
| 2051 lept_fopen(const char *filename, | |
| 2052 const char *mode) | |
| 2053 { | |
| 2054 if (!filename) | |
| 2055 return (FILE *)ERROR_PTR("filename not defined", __func__, NULL); | |
| 2056 if (!mode) | |
| 2057 return (FILE *)ERROR_PTR("mode not defined", __func__, NULL); | |
| 2058 | |
| 2059 if (stringFindSubstr(mode, "r", NULL)) | |
| 2060 return fopenReadStream(filename); | |
| 2061 else | |
| 2062 return fopenWriteStream(filename, mode); | |
| 2063 } | |
| 2064 | |
| 2065 | |
| 2066 /*! | |
| 2067 * \brief lept_fclose() | |
| 2068 * | |
| 2069 * \param[in] fp file stream | |
| 2070 * \return 0 if OK, 1 on error | |
| 2071 * | |
| 2072 * <pre> | |
| 2073 * Notes: | |
| 2074 * (1) This should be used by any application that accepts | |
| 2075 * a file handle generated by a leptonica Windows DLL. | |
| 2076 * </pre> | |
| 2077 */ | |
| 2078 l_ok | |
| 2079 lept_fclose(FILE *fp) | |
| 2080 { | |
| 2081 if (!fp) | |
| 2082 return ERROR_INT("stream not defined", __func__, 1); | |
| 2083 | |
| 2084 return fclose(fp); | |
| 2085 } | |
| 2086 | |
| 2087 | |
| 2088 /*! | |
| 2089 * \brief lept_calloc() | |
| 2090 * | |
| 2091 * \param[in] nmemb number of members | |
| 2092 * \param[in] size of each member | |
| 2093 * \return void ptr, or NULL on error | |
| 2094 * | |
| 2095 * <pre> | |
| 2096 * Notes: | |
| 2097 * (1) For safety with Windows DLLs, this can be used in conjunction | |
| 2098 * with lept_free() to avoid C-runtime boundary problems. | |
| 2099 * Just use these two functions throughout your application. | |
| 2100 * </pre> | |
| 2101 */ | |
| 2102 void * | |
| 2103 lept_calloc(size_t nmemb, | |
| 2104 size_t size) | |
| 2105 { | |
| 2106 if (nmemb <= 0 || size <= 0) | |
| 2107 return NULL; | |
| 2108 return LEPT_CALLOC(nmemb, size); | |
| 2109 } | |
| 2110 | |
| 2111 | |
| 2112 /*! | |
| 2113 * \brief lept_free() | |
| 2114 * | |
| 2115 * \param[in] ptr | |
| 2116 * | |
| 2117 * <pre> | |
| 2118 * Notes: | |
| 2119 * (1) This should be used by any application that accepts | |
| 2120 * heap data allocated by a leptonica Windows DLL. | |
| 2121 * </pre> | |
| 2122 */ | |
| 2123 void | |
| 2124 lept_free(void *ptr) | |
| 2125 { | |
| 2126 if (!ptr) return; | |
| 2127 LEPT_FREE(ptr); | |
| 2128 } | |
| 2129 | |
| 2130 | |
| 2131 /*--------------------------------------------------------------------* | |
| 2132 * Multi-platform file system operations * | |
| 2133 * [ These only write to /tmp or its subdirectories ] * | |
| 2134 *--------------------------------------------------------------------*/ | |
| 2135 /*! | |
| 2136 * \brief lept_mkdir() | |
| 2137 * | |
| 2138 * \param[in] subdir of /tmp or its OS specific equivalent | |
| 2139 * \return 0 on success, non-zero on failure | |
| 2140 * | |
| 2141 * <pre> | |
| 2142 * Notes: | |
| 2143 * (1) %subdir is a partial path that can consist of one or more | |
| 2144 * directories. | |
| 2145 * (2) This makes any subdirectories of /tmp that are required. | |
| 2146 * (3) The root temp directory is: | |
| 2147 * /tmp (unix) [default] | |
| 2148 * [Temp] (Windows) | |
| 2149 * </pre> | |
| 2150 */ | |
| 2151 l_int32 | |
| 2152 lept_mkdir(const char *subdir) | |
| 2153 { | |
| 2154 char *dir, *tmpdir; | |
| 2155 l_int32 i, n; | |
| 2156 l_int32 ret = 0; | |
| 2157 SARRAY *sa; | |
| 2158 #ifdef _WIN32 | |
| 2159 l_uint32 attributes; | |
| 2160 #endif /* _WIN32 */ | |
| 2161 | |
| 2162 if (!LeptDebugOK) { | |
| 2163 L_INFO("making named temp subdirectory %s is disabled\n", | |
| 2164 __func__, subdir); | |
| 2165 return 0; | |
| 2166 } | |
| 2167 | |
| 2168 if (!subdir) | |
| 2169 return ERROR_INT("subdir not defined", __func__, 1); | |
| 2170 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) | |
| 2171 return ERROR_INT("subdir not an actual subdirectory", __func__, 1); | |
| 2172 | |
| 2173 sa = sarrayCreate(0); | |
| 2174 sarraySplitString(sa, subdir, "/"); | |
| 2175 n = sarrayGetCount(sa); | |
| 2176 dir = genPathname("/tmp", NULL); | |
| 2177 /* Make sure the tmp directory exists */ | |
| 2178 #ifndef _WIN32 | |
| 2179 ret = mkdir(dir, 0777); | |
| 2180 #else | |
| 2181 attributes = GetFileAttributesA(dir); | |
| 2182 if (attributes == INVALID_FILE_ATTRIBUTES) | |
| 2183 ret = (CreateDirectoryA(dir, NULL) ? 0 : 1); | |
| 2184 #endif | |
| 2185 /* Make all the subdirectories */ | |
| 2186 for (i = 0; i < n; i++) { | |
| 2187 tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY)); | |
| 2188 #ifndef _WIN32 | |
| 2189 ret += mkdir(tmpdir, 0777); | |
| 2190 #else | |
| 2191 if (CreateDirectoryA(tmpdir, NULL) == 0) | |
| 2192 ret += (GetLastError() != ERROR_ALREADY_EXISTS); | |
| 2193 #endif | |
| 2194 LEPT_FREE(dir); | |
| 2195 dir = tmpdir; | |
| 2196 } | |
| 2197 LEPT_FREE(dir); | |
| 2198 sarrayDestroy(&sa); | |
| 2199 if (ret > 0) | |
| 2200 L_ERROR("failure to create %d directories\n", __func__, ret); | |
| 2201 return ret; | |
| 2202 } | |
| 2203 | |
| 2204 | |
| 2205 /*! | |
| 2206 * \brief lept_rmdir() | |
| 2207 * | |
| 2208 * \param[in] subdir of /tmp or its OS specific equivalent | |
| 2209 * \return 0 on success, non-zero on failure | |
| 2210 * | |
| 2211 * <pre> | |
| 2212 * Notes: | |
| 2213 * (1) %subdir is a partial path that can consist of one or more | |
| 2214 * directories. | |
| 2215 * (2) This removes all files from the specified subdirectory of | |
| 2216 * the root temp directory: | |
| 2217 * /tmp (unix) | |
| 2218 * [Temp] (Windows) | |
| 2219 * and then removes the subdirectory. | |
| 2220 * (3) The combination | |
| 2221 * lept_rmdir(subdir); | |
| 2222 * lept_mkdir(subdir); | |
| 2223 * is guaranteed to give you an empty subdirectory. | |
| 2224 * </pre> | |
| 2225 */ | |
| 2226 l_int32 | |
| 2227 lept_rmdir(const char *subdir) | |
| 2228 { | |
| 2229 char *dir, *fname, *fullname; | |
| 2230 l_int32 exists, ret, i, nfiles; | |
| 2231 SARRAY *sa; | |
| 2232 #ifdef _WIN32 | |
| 2233 char *newpath; | |
| 2234 #else | |
| 2235 char *realdir; | |
| 2236 #endif /* _WIN32 */ | |
| 2237 | |
| 2238 if (!subdir) | |
| 2239 return ERROR_INT("subdir not defined", __func__, 1); | |
| 2240 if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) | |
| 2241 return ERROR_INT("subdir not an actual subdirectory", __func__, 1); | |
| 2242 | |
| 2243 /* Find the temp subdirectory */ | |
| 2244 dir = pathJoin("/tmp", subdir); | |
| 2245 if (!dir) | |
| 2246 return ERROR_INT("directory name not made", __func__, 1); | |
| 2247 lept_direxists(dir, &exists); | |
| 2248 if (!exists) { /* fail silently */ | |
| 2249 LEPT_FREE(dir); | |
| 2250 return 0; | |
| 2251 } | |
| 2252 | |
| 2253 /* List all the files in that directory */ | |
| 2254 if ((sa = getFilenamesInDirectory(dir)) == NULL) { | |
| 2255 L_ERROR("directory %s does not exist!\n", __func__, dir); | |
| 2256 LEPT_FREE(dir); | |
| 2257 return 1; | |
| 2258 } | |
| 2259 nfiles = sarrayGetCount(sa); | |
| 2260 | |
| 2261 for (i = 0; i < nfiles; i++) { | |
| 2262 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 2263 fullname = genPathname(dir, fname); | |
| 2264 remove(fullname); | |
| 2265 LEPT_FREE(fullname); | |
| 2266 } | |
| 2267 | |
| 2268 #ifndef _WIN32 | |
| 2269 realdir = genPathname("/tmp", subdir); | |
| 2270 ret = rmdir(realdir); | |
| 2271 LEPT_FREE(realdir); | |
| 2272 #else | |
| 2273 newpath = genPathname(dir, NULL); | |
| 2274 ret = (RemoveDirectoryA(newpath) ? 0 : 1); | |
| 2275 LEPT_FREE(newpath); | |
| 2276 #endif /* !_WIN32 */ | |
| 2277 | |
| 2278 sarrayDestroy(&sa); | |
| 2279 LEPT_FREE(dir); | |
| 2280 return ret; | |
| 2281 } | |
| 2282 | |
| 2283 | |
| 2284 /*! | |
| 2285 * \brief lept_direxists() | |
| 2286 * | |
| 2287 * \param[in] dir | |
| 2288 * \param[out] pexists 1 if it exists; 0 otherwise | |
| 2289 * \return void | |
| 2290 * | |
| 2291 * <pre> | |
| 2292 * Notes: | |
| 2293 * (1) Always use unix pathname separators. | |
| 2294 * (2) By calling genPathname(), if the pathname begins with "/tmp" | |
| 2295 * this does an automatic directory translation for operating | |
| 2296 * systems that use a different path for /tmp. | |
| 2297 * </pre> | |
| 2298 */ | |
| 2299 void | |
| 2300 lept_direxists(const char *dir, | |
| 2301 l_int32 *pexists) | |
| 2302 { | |
| 2303 char *realdir; | |
| 2304 | |
| 2305 if (!pexists) return; | |
| 2306 *pexists = 0; | |
| 2307 if (!dir) return; | |
| 2308 if ((realdir = genPathname(dir, NULL)) == NULL) | |
| 2309 return; | |
| 2310 | |
| 2311 #ifndef _WIN32 | |
| 2312 { | |
| 2313 struct stat s; | |
| 2314 l_int32 err = stat(realdir, &s); | |
| 2315 if (err != -1 && S_ISDIR(s.st_mode)) | |
| 2316 *pexists = 1; | |
| 2317 } | |
| 2318 #else /* _WIN32 */ | |
| 2319 { | |
| 2320 l_uint32 attributes; | |
| 2321 attributes = GetFileAttributesA(realdir); | |
| 2322 if (attributes != INVALID_FILE_ATTRIBUTES && | |
| 2323 (attributes & FILE_ATTRIBUTE_DIRECTORY)) | |
| 2324 *pexists = 1; | |
| 2325 } | |
| 2326 #endif /* _WIN32 */ | |
| 2327 | |
| 2328 LEPT_FREE(realdir); | |
| 2329 } | |
| 2330 | |
| 2331 | |
| 2332 /*! | |
| 2333 * \brief lept_rm_match() | |
| 2334 * | |
| 2335 * \param[in] subdir [optional] if NULL, the removed files are in /tmp | |
| 2336 * \param[in] substr [optional] pattern to match in filename | |
| 2337 * \return 0 on success, non-zero on failure | |
| 2338 * | |
| 2339 * <pre> | |
| 2340 * Notes: | |
| 2341 * (1) This removes the matched files in /tmp or a subdirectory of /tmp. | |
| 2342 * Use NULL for %subdir if the files are in /tmp. | |
| 2343 * (2) If %substr == NULL, this removes all files in the directory. | |
| 2344 * If %substr == "" (empty), this removes no files. | |
| 2345 * If both %subdir == NULL and %substr == NULL, this removes | |
| 2346 * all files in /tmp. | |
| 2347 * (3) Use unix pathname separators. | |
| 2348 * (4) By calling genPathname(), if the pathname begins with "/tmp" | |
| 2349 * this does an automatic directory translation for operating | |
| 2350 * systems that use a different path for /tmp. | |
| 2351 * (5) Error conditions: | |
| 2352 * * returns -1 if the directory is not found | |
| 2353 * * returns the number of files (> 0) that it was unable to remove. | |
| 2354 * </pre> | |
| 2355 */ | |
| 2356 l_int32 | |
| 2357 lept_rm_match(const char *subdir, | |
| 2358 const char *substr) | |
| 2359 { | |
| 2360 char *path, *fname; | |
| 2361 char tempdir[256]; | |
| 2362 l_int32 i, n, ret; | |
| 2363 SARRAY *sa; | |
| 2364 | |
| 2365 makeTempDirname(tempdir, sizeof(tempdir), subdir); | |
| 2366 if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL) | |
| 2367 return ERROR_INT("sa not made", __func__, -1); | |
| 2368 n = sarrayGetCount(sa); | |
| 2369 if (n == 0) { | |
| 2370 L_WARNING("no matching files found\n", __func__); | |
| 2371 sarrayDestroy(&sa); | |
| 2372 return 0; | |
| 2373 } | |
| 2374 | |
| 2375 ret = 0; | |
| 2376 for (i = 0; i < n; i++) { | |
| 2377 fname = sarrayGetString(sa, i, L_NOCOPY); | |
| 2378 path = genPathname(fname, NULL); | |
| 2379 if (lept_rmfile(path) != 0) { | |
| 2380 L_ERROR("failed to remove %s\n", __func__, path); | |
| 2381 ret++; | |
| 2382 } | |
| 2383 LEPT_FREE(path); | |
| 2384 } | |
| 2385 sarrayDestroy(&sa); | |
| 2386 return ret; | |
| 2387 } | |
| 2388 | |
| 2389 | |
| 2390 /*! | |
| 2391 * \brief lept_rm() | |
| 2392 * | |
| 2393 * \param[in] subdir [optional] subdir of '/tmp'; can be NULL | |
| 2394 * \param[in] tail filename without the directory | |
| 2395 * \return 0 on success, non-zero on failure | |
| 2396 * | |
| 2397 * <pre> | |
| 2398 * Notes: | |
| 2399 * (1) By calling genPathname(), this does an automatic directory | |
| 2400 * translation on operating systems which use a different path. | |
| 2401 * </pre> | |
| 2402 */ | |
| 2403 l_int32 | |
| 2404 lept_rm(const char *subdir, | |
| 2405 const char *tail) | |
| 2406 { | |
| 2407 char *path; | |
| 2408 char newtemp[256]; | |
| 2409 l_int32 ret; | |
| 2410 | |
| 2411 if (!tail || strlen(tail) == 0) | |
| 2412 return ERROR_INT("tail undefined or empty", __func__, 1); | |
| 2413 | |
| 2414 if (makeTempDirname(newtemp, sizeof(newtemp), subdir)) | |
| 2415 return ERROR_INT("temp dirname not made", __func__, 1); | |
| 2416 path = genPathname(newtemp, tail); | |
| 2417 ret = lept_rmfile(path); | |
| 2418 LEPT_FREE(path); | |
| 2419 return ret; | |
| 2420 } | |
| 2421 | |
| 2422 | |
| 2423 /*! | |
| 2424 * \brief | |
| 2425 * | |
| 2426 * lept_rmfile() | |
| 2427 * | |
| 2428 * \param[in] filepath full path to file including the directory | |
| 2429 * \return 0 on success, non-zero on failure | |
| 2430 * | |
| 2431 * <pre> | |
| 2432 * Notes: | |
| 2433 * (1) This removes the named file. | |
| 2434 * (2) Use unix pathname separators. | |
| 2435 * (3) There is no name translation. | |
| 2436 * (4) Unlike the other lept_* functions in this section, this can remove | |
| 2437 * any file -- it is not restricted to files that are in /tmp or a | |
| 2438 * subdirectory of it. | |
| 2439 * (5) For files in /tmp or a subdirectory of it, this does an automatic | |
| 2440 * directory translation for operating systems that use a different | |
| 2441 * path for /tmp. | |
| 2442 * </pre> | |
| 2443 */ | |
| 2444 l_int32 | |
| 2445 lept_rmfile(const char *filepath) | |
| 2446 { | |
| 2447 l_int32 ret; | |
| 2448 | |
| 2449 if (!filepath || strlen(filepath) == 0) | |
| 2450 return ERROR_INT("filepath undefined or empty", __func__, 1); | |
| 2451 | |
| 2452 #ifndef _WIN32 | |
| 2453 ret = remove(filepath); | |
| 2454 #else | |
| 2455 /* Set attributes to allow deletion of read-only files */ | |
| 2456 SetFileAttributesA(filepath, FILE_ATTRIBUTE_NORMAL); | |
| 2457 ret = DeleteFileA(filepath) ? 0 : 1; | |
| 2458 #endif /* !_WIN32 */ | |
| 2459 | |
| 2460 return ret; | |
| 2461 } | |
| 2462 | |
| 2463 | |
| 2464 /*! | |
| 2465 * \brief lept_mv() | |
| 2466 * | |
| 2467 * \param[in] srcfile | |
| 2468 * \param[in] newdir [optional]; can be NULL | |
| 2469 * \param[in] newtail [optional]; can be NULL | |
| 2470 * \param[out] pnewpath [optional] of actual path; can be NULL | |
| 2471 * \return 0 on success, non-zero on failure | |
| 2472 * | |
| 2473 * <pre> | |
| 2474 * Notes: | |
| 2475 * (1) This moves %srcfile to /tmp or to a subdirectory of /tmp. | |
| 2476 * (2) %srcfile can either be a full path or relative to the | |
| 2477 * current directory. | |
| 2478 * (3) %newdir can either specify an existing subdirectory of /tmp | |
| 2479 * or can be NULL. In the latter case, the file will be written | |
| 2480 * into /tmp. | |
| 2481 * (4) %newtail can either specify a filename tail or, if NULL, | |
| 2482 * the filename is taken from src-tail, the tail of %srcfile. | |
| 2483 * (5) For debugging, the computed newpath can be returned. It must | |
| 2484 * be freed by the caller. | |
| 2485 * (6) Reminders: | |
| 2486 * (a) specify files using unix pathnames | |
| 2487 * (b) this does an automatic directory translation on operating | |
| 2488 * systems that use a different path for /tmp. | |
| 2489 * (7) Examples: | |
| 2490 * * newdir = NULL, newtail = NULL ==> /tmp/src-tail | |
| 2491 * * newdir = NULL, newtail = abc ==> /tmp/abc | |
| 2492 * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail | |
| 2493 * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc | |
| 2494 * </pre> | |
| 2495 */ | |
| 2496 l_int32 | |
| 2497 lept_mv(const char *srcfile, | |
| 2498 const char *newdir, | |
| 2499 const char *newtail, | |
| 2500 char **pnewpath) | |
| 2501 { | |
| 2502 char *srcpath, *newpath, *dir, *srctail; | |
| 2503 char newtemp[256]; | |
| 2504 l_int32 ret; | |
| 2505 | |
| 2506 if (!srcfile) | |
| 2507 return ERROR_INT("srcfile not defined", __func__, 1); | |
| 2508 | |
| 2509 /* Require output pathname to be in /tmp/ or a subdirectory */ | |
| 2510 if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) | |
| 2511 return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1); | |
| 2512 | |
| 2513 /* Get canonical src pathname */ | |
| 2514 splitPathAtDirectory(srcfile, &dir, &srctail); | |
| 2515 | |
| 2516 #ifndef _WIN32 | |
| 2517 srcpath = pathJoin(dir, srctail); | |
| 2518 LEPT_FREE(dir); | |
| 2519 | |
| 2520 /* Generate output pathname */ | |
| 2521 if (!newtail || newtail[0] == '\0') | |
| 2522 newpath = pathJoin(newtemp, srctail); | |
| 2523 else | |
| 2524 newpath = pathJoin(newtemp, newtail); | |
| 2525 LEPT_FREE(srctail); | |
| 2526 | |
| 2527 /* Overwrite any existing file at 'newpath' */ | |
| 2528 ret = fileCopy(srcpath, newpath); | |
| 2529 if (!ret) { /* and remove srcfile */ | |
| 2530 char *realpath = genPathname(srcpath, NULL); | |
| 2531 remove(realpath); | |
| 2532 LEPT_FREE(realpath); | |
| 2533 } | |
| 2534 #else | |
| 2535 srcpath = genPathname(dir, srctail); | |
| 2536 LEPT_FREE(dir); | |
| 2537 | |
| 2538 /* Generate output pathname */ | |
| 2539 if (!newtail || newtail[0] == '\0') | |
| 2540 newpath = genPathname(newtemp, srctail); | |
| 2541 else | |
| 2542 newpath = genPathname(newtemp, newtail); | |
| 2543 LEPT_FREE(srctail); | |
| 2544 | |
| 2545 /* Overwrite any existing file at 'newpath' */ | |
| 2546 ret = MoveFileExA(srcpath, newpath, | |
| 2547 MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1; | |
| 2548 #endif /* ! _WIN32 */ | |
| 2549 | |
| 2550 LEPT_FREE(srcpath); | |
| 2551 if (pnewpath) | |
| 2552 *pnewpath = newpath; | |
| 2553 else | |
| 2554 LEPT_FREE(newpath); | |
| 2555 return ret; | |
| 2556 } | |
| 2557 | |
| 2558 | |
| 2559 /*! | |
| 2560 * \brief lept_cp() | |
| 2561 * | |
| 2562 * \param[in] srcfile | |
| 2563 * \param[in] newdir [optional]; can be NULL | |
| 2564 * \param[in] newtail [optional]; can be NULL | |
| 2565 * \param[out] pnewpath [optional] of actual path; can be NULL | |
| 2566 * \return 0 on success, non-zero on failure | |
| 2567 * | |
| 2568 * <pre> | |
| 2569 * Notes: | |
| 2570 * (1) This copies %srcfile to /tmp or to a subdirectory of /tmp. | |
| 2571 * (2) %srcfile can either be a full path or relative to the | |
| 2572 * current directory. | |
| 2573 * (3) %newdir can either specify an existing subdirectory of /tmp, | |
| 2574 * or can be NULL. In the latter case, the file will be written | |
| 2575 * into /tmp. | |
| 2576 * (4) %newtail can either specify a filename tail or, if NULL, | |
| 2577 * the filename is taken from src-tail, the tail of %srcfile. | |
| 2578 * (5) For debugging, the computed newpath can be returned. It must | |
| 2579 * be freed by the caller. | |
| 2580 * (6) Reminders: | |
| 2581 * (a) specify files using unix pathnames | |
| 2582 * (b) this does an automatic directory translation for operating | |
| 2583 * systems that use a different path for /tmp | |
| 2584 * (7) Examples: | |
| 2585 * * newdir = NULL, newtail = NULL ==> /tmp/src-tail | |
| 2586 * * newdir = NULL, newtail = abc ==> /tmp/abc | |
| 2587 * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail | |
| 2588 * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc | |
| 2589 * | |
| 2590 * </pre> | |
| 2591 */ | |
| 2592 l_int32 | |
| 2593 lept_cp(const char *srcfile, | |
| 2594 const char *newdir, | |
| 2595 const char *newtail, | |
| 2596 char **pnewpath) | |
| 2597 { | |
| 2598 char *srcpath, *newpath, *dir, *srctail; | |
| 2599 char newtemp[256]; | |
| 2600 l_int32 ret; | |
| 2601 | |
| 2602 if (!srcfile) | |
| 2603 return ERROR_INT("srcfile not defined", __func__, 1); | |
| 2604 | |
| 2605 /* Require output pathname to be in /tmp or a subdirectory */ | |
| 2606 if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) | |
| 2607 return ERROR_INT("newdir not NULL or a subdir of /tmp", __func__, 1); | |
| 2608 | |
| 2609 /* Get canonical src pathname */ | |
| 2610 splitPathAtDirectory(srcfile, &dir, &srctail); | |
| 2611 | |
| 2612 #ifndef _WIN32 | |
| 2613 srcpath = pathJoin(dir, srctail); | |
| 2614 LEPT_FREE(dir); | |
| 2615 | |
| 2616 /* Generate output pathname */ | |
| 2617 if (!newtail || newtail[0] == '\0') | |
| 2618 newpath = pathJoin(newtemp, srctail); | |
| 2619 else | |
| 2620 newpath = pathJoin(newtemp, newtail); | |
| 2621 LEPT_FREE(srctail); | |
| 2622 | |
| 2623 /* Overwrite any existing file at 'newpath' */ | |
| 2624 ret = fileCopy(srcpath, newpath); | |
| 2625 #else | |
| 2626 srcpath = genPathname(dir, srctail); | |
| 2627 LEPT_FREE(dir); | |
| 2628 | |
| 2629 /* Generate output pathname */ | |
| 2630 if (!newtail || newtail[0] == '\0') | |
| 2631 newpath = genPathname(newtemp, srctail); | |
| 2632 else | |
| 2633 newpath = genPathname(newtemp, newtail); | |
| 2634 LEPT_FREE(srctail); | |
| 2635 | |
| 2636 /* Overwrite any existing file at 'newpath' */ | |
| 2637 ret = CopyFileA(srcpath, newpath, FALSE) ? 0 : 1; | |
| 2638 #endif /* !_WIN32 */ | |
| 2639 | |
| 2640 LEPT_FREE(srcpath); | |
| 2641 if (pnewpath) | |
| 2642 *pnewpath = newpath; | |
| 2643 else | |
| 2644 LEPT_FREE(newpath); | |
| 2645 return ret; | |
| 2646 } | |
| 2647 | |
| 2648 | |
| 2649 /*--------------------------------------------------------------------* | |
| 2650 * Special debug/test function for calling 'system' * | |
| 2651 *--------------------------------------------------------------------*/ | |
| 2652 #if defined(__APPLE__) | |
| 2653 #include "TargetConditionals.h" | |
| 2654 #endif /* __APPLE__ */ | |
| 2655 | |
| 2656 /*! | |
| 2657 * \brief callSystemDebug() | |
| 2658 * | |
| 2659 * \param[in] cmd command to be exec'd | |
| 2660 * \return 0 on success | |
| 2661 * | |
| 2662 * <pre> | |
| 2663 * Notes: | |
| 2664 * (1) The C library 'system' call is only made through this function. | |
| 2665 * It only works in debug/test mode, where the global variable | |
| 2666 * LeptDebugOK == TRUE. This variable is set to FALSE in the | |
| 2667 * library as distributed, and calling this function will | |
| 2668 * generate an error message. | |
| 2669 * </pre> | |
| 2670 */ | |
| 2671 l_int32 | |
| 2672 callSystemDebug(const char *cmd) | |
| 2673 { | |
| 2674 l_int32 ret; | |
| 2675 | |
| 2676 if (!cmd) { | |
| 2677 L_ERROR("cmd not defined\n", __func__); | |
| 2678 return 1; | |
| 2679 } | |
| 2680 if (LeptDebugOK == FALSE) { | |
| 2681 L_INFO("'system' calls are disabled\n", __func__); | |
| 2682 return 1; | |
| 2683 } | |
| 2684 | |
| 2685 #if defined(__APPLE__) /* iOS 11 does not support system() */ | |
| 2686 | |
| 2687 #if (defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1) /* Mac OS X */ | |
| 2688 ret = system(cmd); | |
| 2689 #elif TARGET_OS_IPHONE || defined(OS_IOS) /* iOS */ | |
| 2690 L_ERROR("iOS 11 does not support system()\n", __func__); | |
| 2691 #endif /* TARGET_OS_OSX */ | |
| 2692 | |
| 2693 #else /* ! __APPLE__ */ | |
| 2694 | |
| 2695 ret = system(cmd); | |
| 2696 | |
| 2697 #endif /* __APPLE__ */ | |
| 2698 | |
| 2699 return ret; | |
| 2700 } | |
| 2701 | |
| 2702 | |
| 2703 /*--------------------------------------------------------------------* | |
| 2704 * General file name operations * | |
| 2705 *--------------------------------------------------------------------*/ | |
| 2706 /*! | |
| 2707 * \brief splitPathAtDirectory() | |
| 2708 * | |
| 2709 * \param[in] pathname full path; can be a directory | |
| 2710 * \param[out] pdir [optional] root directory name of | |
| 2711 * input path, including trailing '/' | |
| 2712 * \param[out] ptail [optional] path tail, which is either | |
| 2713 * the file name within the root directory or | |
| 2714 * the last sub-directory in the path | |
| 2715 * \return 0 if OK, 1 on error | |
| 2716 * | |
| 2717 * <pre> | |
| 2718 * Notes: | |
| 2719 * (1) If you only want the tail, input null for the root directory ptr. | |
| 2720 * (2) If you only want the root directory name, input null for the | |
| 2721 * tail ptr. | |
| 2722 * (3) This function makes decisions based only on the lexical | |
| 2723 * structure of the input. Examples: | |
| 2724 * /usr/tmp/abc.d --> dir: /usr/tmp/ tail: abc.d | |
| 2725 * /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] | |
| 2726 * /usr/tmp --> dir: /usr/ tail: tmp | |
| 2727 * abc.d --> dir: [empty string] tail: abc.d | |
| 2728 * (4 Consider the first example above: /usr/tmp/abc.d. | |
| 2729 * Suppose you want the stem of the file, abc, without either | |
| 2730 * the directory or the extension. This can be extracted in two steps: | |
| 2731 * splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail); | |
| 2732 * [sets tail: "abc.d"] | |
| 2733 * splitPathAtExtension(tail, &basename, NULL); | |
| 2734 * [sets basename: "abc"] | |
| 2735 * (5) The input can have either forward (unix) or backward (win) | |
| 2736 * slash separators. The output has unix separators. | |
| 2737 * Note that Win32 pathname functions generally accept both | |
| 2738 * slash forms, but the Windows command line interpreter | |
| 2739 * only accepts backward slashes, because forward slashes are | |
| 2740 * used to demarcate switches (vs. dashes in unix). | |
| 2741 * </pre> | |
| 2742 */ | |
| 2743 l_ok | |
| 2744 splitPathAtDirectory(const char *pathname, | |
| 2745 char **pdir, | |
| 2746 char **ptail) | |
| 2747 { | |
| 2748 char *cpathname, *lastslash; | |
| 2749 | |
| 2750 if (!pdir && !ptail) | |
| 2751 return ERROR_INT("null input for both strings", __func__, 1); | |
| 2752 if (pdir) *pdir = NULL; | |
| 2753 if (ptail) *ptail = NULL; | |
| 2754 if (!pathname) | |
| 2755 return ERROR_INT("pathname not defined", __func__, 1); | |
| 2756 | |
| 2757 cpathname = stringNew(pathname); | |
| 2758 convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR); | |
| 2759 lastslash = strrchr(cpathname, '/'); | |
| 2760 if (lastslash) { | |
| 2761 if (ptail) | |
| 2762 *ptail = stringNew(lastslash + 1); | |
| 2763 if (pdir) { | |
| 2764 *(lastslash + 1) = '\0'; | |
| 2765 *pdir = cpathname; | |
| 2766 } else { | |
| 2767 LEPT_FREE(cpathname); | |
| 2768 } | |
| 2769 } else { /* no directory */ | |
| 2770 if (pdir) | |
| 2771 *pdir = stringNew(""); | |
| 2772 if (ptail) | |
| 2773 *ptail = cpathname; | |
| 2774 else | |
| 2775 LEPT_FREE(cpathname); | |
| 2776 } | |
| 2777 | |
| 2778 return 0; | |
| 2779 } | |
| 2780 | |
| 2781 | |
| 2782 /*! | |
| 2783 * \brief splitPathAtExtension() | |
| 2784 * | |
| 2785 * \param[in] pathname full path; can be a directory | |
| 2786 * \param[out] pbasename [optional] pathname not including the | |
| 2787 * last dot and characters after that | |
| 2788 * \param[out] pextension [optional] path extension, which is | |
| 2789 * the last dot and the characters after it. If | |
| 2790 * there is no extension, it returns the empty string | |
| 2791 * \return 0 if OK, 1 on error | |
| 2792 * | |
| 2793 * <pre> | |
| 2794 * Notes: | |
| 2795 * (1) If you only want the extension, input null for the basename ptr. | |
| 2796 * (2) If you only want the basename without extension, input null | |
| 2797 * for the extension ptr. | |
| 2798 * (3) This function makes decisions based only on the lexical | |
| 2799 * structure of the input. Examples: | |
| 2800 * /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg | |
| 2801 * /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg | |
| 2802 * /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str] | |
| 2803 * ./.jpg --> basename: ./ ext: .jpg | |
| 2804 * (4) The input can have either forward (unix) or backward (win) | |
| 2805 * slash separators. The output has unix separators. | |
| 2806 * (5) Note that basename, as used here, is different from the result | |
| 2807 * of the unix program 'basename'. Here, basename is the entire | |
| 2808 * pathname up to a final extension and its preceding dot. | |
| 2809 * </pre> | |
| 2810 */ | |
| 2811 l_ok | |
| 2812 splitPathAtExtension(const char *pathname, | |
| 2813 char **pbasename, | |
| 2814 char **pextension) | |
| 2815 { | |
| 2816 char *tail, *dir, *lastdot; | |
| 2817 char empty[4] = ""; | |
| 2818 | |
| 2819 if (!pbasename && !pextension) | |
| 2820 return ERROR_INT("null input for both strings", __func__, 1); | |
| 2821 if (pbasename) *pbasename = NULL; | |
| 2822 if (pextension) *pextension = NULL; | |
| 2823 if (!pathname) | |
| 2824 return ERROR_INT("pathname not defined", __func__, 1); | |
| 2825 | |
| 2826 /* Split out the directory first */ | |
| 2827 splitPathAtDirectory(pathname, &dir, &tail); | |
| 2828 | |
| 2829 /* Then look for a "." in the tail part. | |
| 2830 * This way we ignore all "." in the directory. */ | |
| 2831 if ((lastdot = strrchr(tail, '.'))) { | |
| 2832 if (pextension) | |
| 2833 *pextension = stringNew(lastdot); | |
| 2834 if (pbasename) { | |
| 2835 *lastdot = '\0'; | |
| 2836 *pbasename = stringJoin(dir, tail); | |
| 2837 } | |
| 2838 } else { | |
| 2839 if (pextension) | |
| 2840 *pextension = stringNew(empty); | |
| 2841 if (pbasename) | |
| 2842 *pbasename = stringNew(pathname); | |
| 2843 } | |
| 2844 LEPT_FREE(dir); | |
| 2845 LEPT_FREE(tail); | |
| 2846 return 0; | |
| 2847 } | |
| 2848 | |
| 2849 | |
| 2850 /*! | |
| 2851 * \brief pathJoin() | |
| 2852 * | |
| 2853 * \param[in] dir [optional] can be null | |
| 2854 * \param[in] fname [optional] can be null | |
| 2855 * \return specially concatenated path, or NULL on error | |
| 2856 * | |
| 2857 * <pre> | |
| 2858 * Notes: | |
| 2859 * (1) Use unix-style pathname separators ('/'). | |
| 2860 * (2) %fname can be the entire path, or part of the path containing | |
| 2861 * at least one directory, or a tail without a directory, or NULL. | |
| 2862 * (3) It produces a path that strips multiple slashes to a single | |
| 2863 * slash, joins %dir and %fname by a slash, and has no trailing | |
| 2864 * slashes (except in the cases where %dir == "/" and | |
| 2865 * %fname == NULL, or v.v.). | |
| 2866 * (4) If both %dir and %fname are null, produces an empty string. | |
| 2867 * (5) Neither %dir nor %fname can begin with '..'. | |
| 2868 * (6) The result is not canonicalized or tested for correctness: | |
| 2869 * garbage in (e.g., /&%), garbage out. | |
| 2870 * (7) Examples: | |
| 2871 * //tmp// + //abc/ --> /tmp/abc | |
| 2872 * tmp/ + /abc/ --> tmp/abc | |
| 2873 * tmp/ + abc/ --> tmp/abc | |
| 2874 * /tmp/ + /// --> /tmp | |
| 2875 * /tmp/ + NULL --> /tmp | |
| 2876 * // + /abc// --> /abc | |
| 2877 * // + NULL --> / | |
| 2878 * NULL + /abc/def/ --> /abc/def | |
| 2879 * NULL + abc// --> abc | |
| 2880 * NULL + // --> / | |
| 2881 * NULL + NULL --> (empty string) | |
| 2882 * "" + "" --> (empty string) | |
| 2883 * "" + / --> / | |
| 2884 * ".." + /etc/foo --> NULL | |
| 2885 * /tmp + ".." --> NULL | |
| 2886 * </pre> | |
| 2887 */ | |
| 2888 char * | |
| 2889 pathJoin(const char *dir, | |
| 2890 const char *fname) | |
| 2891 { | |
| 2892 const char *slash = "/"; | |
| 2893 char *str, *dest; | |
| 2894 l_int32 i, n1, n2, emptydir; | |
| 2895 size_t size; | |
| 2896 SARRAY *sa1, *sa2; | |
| 2897 L_BYTEA *ba; | |
| 2898 | |
| 2899 if (!dir && !fname) | |
| 2900 return stringNew(""); | |
| 2901 if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.') | |
| 2902 return (char *)ERROR_PTR("dir starts with '..'", __func__, NULL); | |
| 2903 if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.') | |
| 2904 return (char *)ERROR_PTR("fname starts with '..'", __func__, NULL); | |
| 2905 | |
| 2906 sa1 = sarrayCreate(0); | |
| 2907 sa2 = sarrayCreate(0); | |
| 2908 ba = l_byteaCreate(4); | |
| 2909 | |
| 2910 /* Process %dir */ | |
| 2911 if (dir && strlen(dir) > 0) { | |
| 2912 if (dir[0] == '/') | |
| 2913 l_byteaAppendString(ba, slash); | |
| 2914 sarraySplitString(sa1, dir, "/"); /* removes all slashes */ | |
| 2915 n1 = sarrayGetCount(sa1); | |
| 2916 for (i = 0; i < n1; i++) { | |
| 2917 str = sarrayGetString(sa1, i, L_NOCOPY); | |
| 2918 l_byteaAppendString(ba, str); | |
| 2919 l_byteaAppendString(ba, slash); | |
| 2920 } | |
| 2921 } | |
| 2922 | |
| 2923 /* Special case to add leading slash: dir NULL or empty string */ | |
| 2924 emptydir = dir && strlen(dir) == 0; | |
| 2925 if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/') | |
| 2926 l_byteaAppendString(ba, slash); | |
| 2927 | |
| 2928 /* Process %fname */ | |
| 2929 if (fname && strlen(fname) > 0) { | |
| 2930 sarraySplitString(sa2, fname, "/"); | |
| 2931 n2 = sarrayGetCount(sa2); | |
| 2932 for (i = 0; i < n2; i++) { | |
| 2933 str = sarrayGetString(sa2, i, L_NOCOPY); | |
| 2934 l_byteaAppendString(ba, str); | |
| 2935 l_byteaAppendString(ba, slash); | |
| 2936 } | |
| 2937 } | |
| 2938 | |
| 2939 /* Remove trailing slash */ | |
| 2940 dest = (char *)l_byteaCopyData(ba, &size); | |
| 2941 if (size > 1 && dest[size - 1] == '/') | |
| 2942 dest[size - 1] = '\0'; | |
| 2943 | |
| 2944 sarrayDestroy(&sa1); | |
| 2945 sarrayDestroy(&sa2); | |
| 2946 l_byteaDestroy(&ba); | |
| 2947 return dest; | |
| 2948 } | |
| 2949 | |
| 2950 | |
| 2951 /*! | |
| 2952 * \brief appendSubdirs() | |
| 2953 * | |
| 2954 * \param[in] basedir | |
| 2955 * \param[in] subdirs | |
| 2956 * \return concatenated full directory path without trailing slash, | |
| 2957 * or NULL on error | |
| 2958 * | |
| 2959 * <pre> | |
| 2960 * Notes: | |
| 2961 * (1) Use unix pathname separators | |
| 2962 * (2) Allocates a new string: [basedir]/[subdirs] | |
| 2963 * </pre> | |
| 2964 */ | |
| 2965 char * | |
| 2966 appendSubdirs(const char *basedir, | |
| 2967 const char *subdirs) | |
| 2968 { | |
| 2969 char *newdir; | |
| 2970 size_t len1, len2, len3, len4; | |
| 2971 | |
| 2972 if (!basedir || !subdirs) | |
| 2973 return (char *)ERROR_PTR("basedir and subdirs not both defined", | |
| 2974 __func__, NULL); | |
| 2975 | |
| 2976 len1 = strlen(basedir); | |
| 2977 len2 = strlen(subdirs); | |
| 2978 len3 = len1 + len2 + 8; | |
| 2979 if ((newdir = (char *)LEPT_CALLOC(len3, 1)) == NULL) | |
| 2980 return (char *)ERROR_PTR("newdir not made", __func__, NULL); | |
| 2981 stringCat(newdir, len3, basedir); | |
| 2982 if (newdir[len1 - 1] != '/') /* add '/' if necessary */ | |
| 2983 newdir[len1] = '/'; | |
| 2984 if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */ | |
| 2985 stringCat(newdir, len3, subdirs + 1); | |
| 2986 else | |
| 2987 stringCat(newdir, len3, subdirs); | |
| 2988 len4 = strlen(newdir); | |
| 2989 if (newdir[len4 - 1] == '/') /* strip trailing '/' */ | |
| 2990 newdir[len4 - 1] = '\0'; | |
| 2991 | |
| 2992 return newdir; | |
| 2993 } | |
| 2994 | |
| 2995 | |
| 2996 /*--------------------------------------------------------------------* | |
| 2997 * Special file name operations * | |
| 2998 *--------------------------------------------------------------------*/ | |
| 2999 /*! | |
| 3000 * \brief convertSepCharsInPath() | |
| 3001 * | |
| 3002 * \param[in] path | |
| 3003 * \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR | |
| 3004 * \return 0 if OK, 1 on error | |
| 3005 * | |
| 3006 * <pre> | |
| 3007 * Notes: | |
| 3008 * (1) In-place conversion. | |
| 3009 * (2) Type is the resulting type: | |
| 3010 * * UNIX_PATH_SEPCHAR: '\\' ==> '/' | |
| 3011 * * WIN_PATH_SEPCHAR: '/' ==> '\\' | |
| 3012 * (3) Virtually all path operations in leptonica use unix separators. | |
| 3013 * (4) The backslash is a valid character in unix pathnames and should | |
| 3014 * not be converted. Each backslash needs to be escaped with a | |
| 3015 * preceding backslash for the shell, but the actual filename | |
| 3016 * does not include these escape characters. | |
| 3017 * </pre> | |
| 3018 */ | |
| 3019 l_ok | |
| 3020 convertSepCharsInPath(char *path, | |
| 3021 l_int32 type) | |
| 3022 { | |
| 3023 l_int32 i; | |
| 3024 size_t len; | |
| 3025 | |
| 3026 if (!path) | |
| 3027 return ERROR_INT("path not defined", __func__, 1); | |
| 3028 if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR) | |
| 3029 return ERROR_INT("invalid type", __func__, 1); | |
| 3030 | |
| 3031 len = strlen(path); | |
| 3032 if (type == UNIX_PATH_SEPCHAR) { | |
| 3033 #ifdef _WIN32 /* only convert on Windows */ | |
| 3034 for (i = 0; i < len; i++) { | |
| 3035 if (path[i] == '\\') | |
| 3036 path[i] = '/'; | |
| 3037 } | |
| 3038 #endif /* _WIN32 */ | |
| 3039 } else { /* WIN_PATH_SEPCHAR */ | |
| 3040 for (i = 0; i < len; i++) { | |
| 3041 if (path[i] == '/') | |
| 3042 path[i] = '\\'; | |
| 3043 } | |
| 3044 } | |
| 3045 return 0; | |
| 3046 } | |
| 3047 | |
| 3048 | |
| 3049 /*! | |
| 3050 * \brief genPathname() | |
| 3051 * | |
| 3052 * \param[in] dir [optional] directory or full path name, | |
| 3053 * with or without the trailing '/' | |
| 3054 * \param[in] fname [optional] file name within a directory | |
| 3055 * \return pathname either a directory or full path, or NULL on error | |
| 3056 * | |
| 3057 * <pre> | |
| 3058 * Notes: | |
| 3059 * (1) This function generates actual paths in the following ways: | |
| 3060 * * from two sub-parts (e.g., a directory and a file name). | |
| 3061 * * from a single path full path, placed in %dir, with | |
| 3062 * %fname == NULL. | |
| 3063 * * from the name of a file in the local directory placed in | |
| 3064 * %fname, with %dir == NULL. | |
| 3065 * * if in a "/tmp" directory and on iOS, macOS or Windows, | |
| 3066 * the OS specific temp directory is used. | |
| 3067 * (2) This does an automatic directory translation for operating | |
| 3068 * systems that use a different path for /tmp. | |
| 3069 * That path is determined | |
| 3070 * * on Windows: by GetTempPath() | |
| 3071 * * on macOS, iOS: by confstr() (see man page) | |
| 3072 * (3) On unix, the TMPDIR variable is ignored. No rewriting | |
| 3073 * of temp directories is permitted. | |
| 3074 * (4) There are four cases for the input: | |
| 3075 * (a) %dir is a directory and %fname is defined: result is a | |
| 3076 * full path | |
| 3077 * (b) %dir is a directory and %fname is null: result is a directory | |
| 3078 * (c) %dir is a full path and %fname is null: result is a full path | |
| 3079 * (d) %dir is null or an empty string: start in the current dir; | |
| 3080 * result is a full path | |
| 3081 * (5) In all cases, the resulting pathname is not terminated with a slash | |
| 3082 * (6) The caller is responsible for freeing the returned pathname. | |
| 3083 * </pre> | |
| 3084 */ | |
| 3085 char * | |
| 3086 genPathname(const char *dir, | |
| 3087 const char *fname) | |
| 3088 { | |
| 3089 #if defined(REWRITE_TMP) | |
| 3090 l_int32 rewrite_tmp = TRUE; | |
| 3091 #else | |
| 3092 l_int32 rewrite_tmp = FALSE; | |
| 3093 #endif /* REWRITE_TMP */ | |
| 3094 char *cdir, *pathout; | |
| 3095 l_int32 dirlen, namelen; | |
| 3096 size_t size; | |
| 3097 | |
| 3098 if (!dir && !fname) | |
| 3099 return (char *)ERROR_PTR("no input", __func__, NULL); | |
| 3100 | |
| 3101 /* Handle the case where we start from the current directory */ | |
| 3102 if (!dir || dir[0] == '\0') { | |
| 3103 if ((cdir = getcwd(NULL, 0)) == NULL) | |
| 3104 return (char *)ERROR_PTR("no current dir found", __func__, NULL); | |
| 3105 } else { | |
| 3106 if ((cdir = stringNew(dir)) == NULL) | |
| 3107 return (char *)ERROR_PTR("stringNew failed", __func__, NULL); | |
| 3108 } | |
| 3109 | |
| 3110 /* Convert to unix path separators, and remove the trailing | |
| 3111 * slash in the directory, except when dir == "/" */ | |
| 3112 convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR); | |
| 3113 dirlen = strlen(cdir); | |
| 3114 if (cdir[dirlen - 1] == '/' && dirlen != 1) { | |
| 3115 cdir[dirlen - 1] = '\0'; | |
| 3116 dirlen--; | |
| 3117 } | |
| 3118 | |
| 3119 namelen = (fname) ? strlen(fname) : 0; | |
| 3120 size = dirlen + namelen + 256; | |
| 3121 if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) { | |
| 3122 LEPT_FREE(cdir); | |
| 3123 return (char *)ERROR_PTR("pathout not made", __func__, NULL); | |
| 3124 } | |
| 3125 | |
| 3126 /* First handle %dir (which may be a full pathname). | |
| 3127 * There is no path rewriting on unix, and on win32, we do not | |
| 3128 * rewrite unless the specified directory is /tmp or | |
| 3129 * a subdirectory of /tmp */ | |
| 3130 if (!rewrite_tmp || dirlen < 4 || | |
| 3131 (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */ | |
| 3132 (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */ | |
| 3133 stringCopy(pathout, cdir, dirlen); | |
| 3134 } else { /* Rewrite with "/tmp" specified for the directory. */ | |
| 3135 #if defined(__APPLE__) | |
| 3136 size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, pathout, size); | |
| 3137 if (n == 0 || n > size) { | |
| 3138 /* Fall back to using /tmp */ | |
| 3139 stringCopy(pathout, cdir, dirlen); | |
| 3140 } else { | |
| 3141 /* Add the rest of cdir */ | |
| 3142 if (dirlen > 4) | |
| 3143 stringCat(pathout, size, cdir + 4); | |
| 3144 } | |
| 3145 #elif defined(_WIN32) | |
| 3146 l_int32 tmpdirlen; | |
| 3147 char tmpdir[MAX_PATH]; | |
| 3148 GetTempPathA(sizeof(tmpdir), tmpdir); /* get the Windows temp dir */ | |
| 3149 tmpdirlen = strlen(tmpdir); | |
| 3150 if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') { | |
| 3151 tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */ | |
| 3152 } | |
| 3153 tmpdirlen = strlen(tmpdir); | |
| 3154 stringCopy(pathout, tmpdir, tmpdirlen); | |
| 3155 | |
| 3156 /* Add the rest of cdir */ | |
| 3157 if (dirlen > 4) | |
| 3158 stringCat(pathout, size, cdir + 4); | |
| 3159 #endif /* _WIN32 */ | |
| 3160 } | |
| 3161 | |
| 3162 /* Now handle %fname */ | |
| 3163 if (fname && strlen(fname) > 0) { | |
| 3164 dirlen = strlen(pathout); | |
| 3165 pathout[dirlen] = '/'; | |
| 3166 stringCat(pathout, size, fname); | |
| 3167 } | |
| 3168 | |
| 3169 LEPT_FREE(cdir); | |
| 3170 return pathout; | |
| 3171 } | |
| 3172 | |
| 3173 | |
| 3174 /*! | |
| 3175 * \brief makeTempDirname() | |
| 3176 * | |
| 3177 * \param[in] result preallocated on stack or heap and passed in | |
| 3178 * \param[in] nbytes size of %result array, in bytes | |
| 3179 * \param[in] subdir [optional]; can be NULL or an empty string | |
| 3180 * \return 0 if OK, 1 on error | |
| 3181 * | |
| 3182 * <pre> | |
| 3183 * Notes: | |
| 3184 * (1) This generates the directory path for output temp files, | |
| 3185 * written into %result with unix separators. | |
| 3186 * (2) Caller allocates %result, large enough to hold the path, | |
| 3187 * which is: | |
| 3188 * /tmp/%subdir (unix) | |
| 3189 * [Temp]/%subdir (Windows, macOS, iOS) | |
| 3190 * where [Temp] is the OS path | |
| 3191 * and %subdir is in general a set of nested subdirectories: | |
| 3192 * dir1/dir2/.../dirN | |
| 3193 * which in use would not typically exceed 2 levels. | |
| 3194 * (3) Usage example: | |
| 3195 * \code | |
| 3196 * char result[256]; | |
| 3197 * makeTempDirname(result, sizeof(result), "lept/golden"); | |
| 3198 * \endcode | |
| 3199 * </pre> | |
| 3200 */ | |
| 3201 l_ok | |
| 3202 makeTempDirname(char *result, | |
| 3203 size_t nbytes, | |
| 3204 const char *subdir) | |
| 3205 { | |
| 3206 char *dir, *path; | |
| 3207 l_int32 ret = 0; | |
| 3208 size_t pathlen; | |
| 3209 | |
| 3210 if (!result) | |
| 3211 return ERROR_INT("result not defined", __func__, 1); | |
| 3212 if (subdir && ((subdir[0] == '.') || (subdir[0] == '/'))) | |
| 3213 return ERROR_INT("subdir not an actual subdirectory", __func__, 1); | |
| 3214 | |
| 3215 memset(result, 0, nbytes); | |
| 3216 | |
| 3217 dir = pathJoin("/tmp", subdir); | |
| 3218 | |
| 3219 #if defined(REWRITE_TMP) | |
| 3220 path = genPathname(dir, NULL); | |
| 3221 #else | |
| 3222 path = stringNew(dir); | |
| 3223 #endif /* ~ _WIN32 */ | |
| 3224 pathlen = strlen(path); | |
| 3225 if (pathlen < nbytes - 1) { | |
| 3226 stringCopy(result, path, nbytes); | |
| 3227 } else { | |
| 3228 L_ERROR("result array too small for path\n", __func__); | |
| 3229 ret = 1; | |
| 3230 } | |
| 3231 | |
| 3232 LEPT_FREE(dir); | |
| 3233 LEPT_FREE(path); | |
| 3234 return ret; | |
| 3235 } | |
| 3236 | |
| 3237 | |
| 3238 /*! | |
| 3239 * \brief modifyTrailingSlash() | |
| 3240 * | |
| 3241 * \param[in] path preallocated on stack or heap and passed in | |
| 3242 * \param[in] nbytes size of %path array, in bytes | |
| 3243 * \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH | |
| 3244 * \return 0 if OK, 1 on error | |
| 3245 * | |
| 3246 * <pre> | |
| 3247 * Notes: | |
| 3248 * (1) This carries out the requested action if necessary. | |
| 3249 * </pre> | |
| 3250 */ | |
| 3251 l_ok | |
| 3252 modifyTrailingSlash(char *path, | |
| 3253 size_t nbytes, | |
| 3254 l_int32 flag) | |
| 3255 { | |
| 3256 char lastchar; | |
| 3257 size_t len; | |
| 3258 | |
| 3259 if (!path) | |
| 3260 return ERROR_INT("path not defined", __func__, 1); | |
| 3261 if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH) | |
| 3262 return ERROR_INT("invalid flag", __func__, 1); | |
| 3263 | |
| 3264 len = strlen(path); | |
| 3265 lastchar = path[len - 1]; | |
| 3266 if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) { | |
| 3267 path[len] = '/'; | |
| 3268 path[len + 1] = '\0'; | |
| 3269 } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') { | |
| 3270 path[len - 1] = '\0'; | |
| 3271 } | |
| 3272 return 0; | |
| 3273 } | |
| 3274 | |
| 3275 | |
| 3276 /*! | |
| 3277 * \brief l_makeTempFilename() | |
| 3278 * | |
| 3279 * \return fname : heap allocated filename; returns NULL on failure. | |
| 3280 * | |
| 3281 * <pre> | |
| 3282 * Notes: | |
| 3283 * (1) On unix, this makes a filename of the form | |
| 3284 * "/tmp/lept.XXXXXX", | |
| 3285 * where each X is a random character. | |
| 3286 * (2) On Windows, this makes a filename of the form | |
| 3287 * "/[Temp]/lp.XXXXXX". | |
| 3288 * (3) On all systems, this fails if the file is not writable. | |
| 3289 * (4) Safest usage is to write to a subdirectory in debug code. | |
| 3290 * (5) The returned filename must be freed by the caller, using lept_free. | |
| 3291 * (6) The tail of the filename has a '.', so that cygwin interprets | |
| 3292 * the file as having an extension. Otherwise, cygwin assumes it | |
| 3293 * is an executable and appends ".exe" to the filename. | |
| 3294 * (7) On unix, whenever possible use tmpfile() instead. tmpfile() | |
| 3295 * hides the file name, returns a stream opened for write, | |
| 3296 * and deletes the temp file when the stream is closed. | |
| 3297 * </pre> | |
| 3298 */ | |
| 3299 char * | |
| 3300 l_makeTempFilename(void) | |
| 3301 { | |
| 3302 char dirname[240]; | |
| 3303 | |
| 3304 if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1) | |
| 3305 return (char *)ERROR_PTR("failed to make dirname", __func__, NULL); | |
| 3306 | |
| 3307 #ifndef _WIN32 | |
| 3308 { | |
| 3309 char *pattern; | |
| 3310 l_int32 fd; | |
| 3311 pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL); | |
| 3312 fd = mkstemp(pattern); | |
| 3313 if (fd == -1) { | |
| 3314 LEPT_FREE(pattern); | |
| 3315 return (char *)ERROR_PTR("mkstemp failed", __func__, NULL); | |
| 3316 } | |
| 3317 close(fd); | |
| 3318 return pattern; | |
| 3319 } | |
| 3320 #else | |
| 3321 { | |
| 3322 char fname[MAX_PATH]; | |
| 3323 FILE *fp; | |
| 3324 if (GetTempFileNameA(dirname, "lp.", 0, fname) == 0) | |
| 3325 return (char *)ERROR_PTR("GetTempFileName failed", __func__, NULL); | |
| 3326 if ((fp = fopen(fname, "wb")) == NULL) | |
| 3327 return (char *)ERROR_PTR("file cannot be written to", __func__, NULL); | |
| 3328 fclose(fp); | |
| 3329 return stringNew(fname); | |
| 3330 } | |
| 3331 #endif /* ~ _WIN32 */ | |
| 3332 } | |
| 3333 | |
| 3334 | |
| 3335 /*! | |
| 3336 * \brief extractNumberFromFilename() | |
| 3337 * | |
| 3338 * \param[in] fname | |
| 3339 * \param[in] numpre number of characters before the digits to be found | |
| 3340 * \param[in] numpost number of characters after the digits to be found | |
| 3341 * \return num number embedded in the filename; -1 on error or if | |
| 3342 * not found | |
| 3343 * | |
| 3344 * <pre> | |
| 3345 * Notes: | |
| 3346 * (1) The number is to be found in the basename, which is the | |
| 3347 * filename without either the directory or the last extension. | |
| 3348 * (2) When a number is found, it is non-negative. If no number | |
| 3349 * is found, this returns -1, without an error message. The | |
| 3350 * caller needs to check. | |
| 3351 * </pre> | |
| 3352 */ | |
| 3353 l_int32 | |
| 3354 extractNumberFromFilename(const char *fname, | |
| 3355 l_int32 numpre, | |
| 3356 l_int32 numpost) | |
| 3357 { | |
| 3358 char *tail, *basename; | |
| 3359 l_int32 len, nret, num; | |
| 3360 | |
| 3361 if (!fname) | |
| 3362 return ERROR_INT("fname not defined", __func__, -1); | |
| 3363 | |
| 3364 splitPathAtDirectory(fname, NULL, &tail); | |
| 3365 splitPathAtExtension(tail, &basename, NULL); | |
| 3366 LEPT_FREE(tail); | |
| 3367 | |
| 3368 len = strlen(basename); | |
| 3369 if (numpre + numpost > len - 1) { | |
| 3370 LEPT_FREE(basename); | |
| 3371 return ERROR_INT("numpre + numpost too big", __func__, -1); | |
| 3372 } | |
| 3373 | |
| 3374 basename[len - numpost] = '\0'; | |
| 3375 nret = sscanf(basename + numpre, "%d", &num); | |
| 3376 LEPT_FREE(basename); | |
| 3377 | |
| 3378 if (nret == 1) | |
| 3379 return num; | |
| 3380 else | |
| 3381 return -1; /* not found */ | |
| 3382 } |
