Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/include/mupdf/fitz/string-util.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/include/mupdf/fitz/string-util.h Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,335 @@ +// Copyright (C) 2004-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#ifndef MUPDF_FITZ_STRING_H +#define MUPDF_FITZ_STRING_H + +#include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" + +/* The Unicode character used to incoming character whose value is + * unknown or unrepresentable. */ +#define FZ_REPLACEMENT_CHARACTER 0xFFFD + +/** + Safe string functions +*/ + +/** + Return strlen(s), if that is less than maxlen, or maxlen if + there is no null byte ('\0') among the first maxlen bytes. +*/ +size_t fz_strnlen(const char *s, size_t maxlen); + +/** + Given a pointer to a C string (or a pointer to NULL) break + it at the first occurrence of a delimiter char (from a given + set). + + stringp: Pointer to a C string pointer (or NULL). Updated on + exit to point to the first char of the string after the + delimiter that was found. The string pointed to by stringp will + be corrupted by this call (as the found delimiter will be + overwritten by 0). + + delim: A C string of acceptable delimiter characters. + + Returns a pointer to a C string containing the chars of stringp + up to the first delimiter char (or the end of the string), or + NULL. +*/ +char *fz_strsep(char **stringp, const char *delim); + +/** + Copy at most n-1 chars of a string into a destination + buffer with null termination, returning the real length of the + initial string (excluding terminator). + + dst: Destination buffer, at least n bytes long. + + src: C string (non-NULL). + + n: Size of dst buffer in bytes. + + Returns the length (excluding terminator) of src. +*/ +size_t fz_strlcpy(char *dst, const char *src, size_t n); + +/** + Concatenate 2 strings, with a maximum length. + + dst: pointer to first string in a buffer of n bytes. + + src: pointer to string to concatenate. + + n: Size (in bytes) of buffer that dst is in. + + Returns the real length that a concatenated dst + src would have + been (not including terminator). +*/ +size_t fz_strlcat(char *dst, const char *src, size_t n); + +/** + Safe strstr function. + + haystack: Where to look (may be NULL). + + needled: What to look for. + + Returns NULL if unmatched, or pointer to start of match. +*/ +const char *fz_strstr(const char *haystack, const char *needle); + +/** + Safe case-insensitive strstr function. (Accepts UTF-8). + + haystack: Where to look (may be NULL). + + needled: What to look for. + + Returns NULL if unmatched, or pointer to start of match. +*/ +const char *fz_strstrcase(const char *haystack, const char *needle); + +/** + Find the start of the first occurrence of the substring needle in haystack. +*/ +void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); + +/** + extract the directory component from a path. +*/ +void fz_dirname(char *dir, const char *path, size_t dirsize); + +/** + Find the filename component in a path. +*/ +const char *fz_basename(const char *path); + +/** + portable strverscmp(3) function +*/ +int fz_strverscmp(const char *s1, const char *s2); + +/** + Like fz_decode_uri_component but in-place. +*/ +char *fz_urldecode(char *url); + +/** + * Return a new string representing the unencoded version of the given URI. + * This decodes all escape sequences except those that would result in a reserved + * character that are part of the URI syntax (; / ? : @ & = + $ , #). + */ +char *fz_decode_uri(fz_context *ctx, const char *s); + +/** + * Return a new string representing the unencoded version of the given URI component. + * This decodes all escape sequences! + */ +char *fz_decode_uri_component(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as a URI. + */ +char *fz_encode_uri(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as an URI component. + * This also encodes the special reserved characters (; / ? : @ & = + $ , #). + */ +char *fz_encode_uri_component(fz_context *ctx, const char *s); + +/** + * Return a new string representing the provided string encoded as an URI path name. + * This also encodes the special reserved characters except /. + */ +char *fz_encode_uri_pathname(fz_context *ctx, const char *s); + +/** + create output file name using a template. + + If the path contains %[0-9]*d, the first such pattern will be + replaced with the page number. If the template does not contain + such a pattern, the page number will be inserted before the + filename extension. If the template does not have a filename + extension, the page number will be added to the end. +*/ +void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page); + +/** + rewrite path to the shortest string that names the same path. + + Eliminates multiple and trailing slashes, interprets "." and + "..". Overwrites the string in place. +*/ +char *fz_cleanname(char *name); + +/** + rewrite path to the shortest string that names the same path. + + Eliminates multiple and trailing slashes, interprets "." and + "..". Allocates a new string that the caller must free. +*/ +char *fz_cleanname_strdup(fz_context *ctx, const char *name); + +/** + Resolve a path to an absolute file name. + The resolved path buffer must be of at least PATH_MAX size. +*/ +char *fz_realpath(const char *path, char *resolved_path); + +/** + Case insensitive (UTF8) string comparison. +*/ +int fz_strcasecmp(const char *a, const char *b); + +/** + Case insensitive (UTF8) string comparison. + + n = maximum number of bytes to read from either a or b. +*/ +int fz_strncasecmp(const char *a, const char *b, size_t n); + +/** + FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum + length returned by fz_chartorune). +*/ +enum { FZ_UTFMAX = 4 }; + +/** + UTF8 decode a single rune from a sequence of chars. + + rune: Pointer to an int to assign the decoded 'rune' to. + (0xFFFD on error). + + str: Pointer to a UTF8 encoded string. + + Returns the number of bytes consumed. +*/ +int fz_chartorune(int *rune, const char *str); + +/** + UTF8 decode a single rune from a sequence of chars + of given length. + + rune: Pointer to an int to assign the decoded 'rune' to. + (0xFFFD on error). + + str: Pointer to a UTF8 encoded string. + + n: The number of bytes available at str. + + Returns the number of bytes consumed. +*/ +int fz_chartorunen(int *rune, const char *str, size_t n); + +/** + UTF8 encode a rune to a sequence of chars. + + str: Pointer to a place to put the UTF8 encoded character. + + rune: Pointer to a 'rune'. + + Returns the number of bytes the rune took to output. +*/ +int fz_runetochar(char *str, int rune); + +/** + Count how many chars are required to represent a rune. + + rune: The rune to encode. + + Returns the number of bytes required to represent this run in + UTF8. +*/ +int fz_runelen(int rune); + +/** + Compute the index of a rune in a string. + + str: Pointer to beginning of a string. + + p: Pointer to a char in str. + + Returns the index of the rune pointed to by p in str. +*/ +int fz_runeidx(const char *str, const char *p); + +/** + Obtain a pointer to the char representing the rune + at a given index. + + str: Pointer to beginning of a string. + + idx: Index of a rune to return a char pointer to. + + Returns a pointer to the char where the desired rune starts, + or NULL if the string ends before the index is reached. +*/ +const char *fz_runeptr(const char *str, int idx); + +/** + Count how many runes the UTF-8 encoded string + consists of. + + s: The UTF-8 encoded, NUL-terminated text string. + + Returns the number of runes in the string. +*/ +int fz_utflen(const char *s); + +/* + Convert a wchar string into a new heap allocated utf8 one. +*/ +char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s); + +/* + Convert a utf8 string into a new heap allocated wchar one. +*/ +wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path); + + +/** + Locale-independent decimal to binary conversion. On overflow + return (-)INFINITY and set errno to ERANGE. On underflow return + 0 and set errno to ERANGE. Special inputs (case insensitive): + "NAN", "INF" or "INFINITY". +*/ +float fz_strtof(const char *s, char **es); + +int fz_grisu(float f, char *s, int *exp); + +/** + Check and parse string into page ranges: + /,?(-?\d+|N)(-(-?\d+|N))?/ +*/ +int fz_is_page_range(fz_context *ctx, const char *s); +const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n); + +/** + Unicode aware tolower and toupper functions. +*/ +int fz_tolower(int c); +int fz_toupper(int c); + +#endif
