Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/include/mupdf/fitz/string-util.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #ifndef MUPDF_FITZ_STRING_H | |
| 24 #define MUPDF_FITZ_STRING_H | |
| 25 | |
| 26 #include "mupdf/fitz/system.h" | |
| 27 #include "mupdf/fitz/context.h" | |
| 28 | |
| 29 /* The Unicode character used to incoming character whose value is | |
| 30 * unknown or unrepresentable. */ | |
| 31 #define FZ_REPLACEMENT_CHARACTER 0xFFFD | |
| 32 | |
| 33 /** | |
| 34 Safe string functions | |
| 35 */ | |
| 36 | |
| 37 /** | |
| 38 Return strlen(s), if that is less than maxlen, or maxlen if | |
| 39 there is no null byte ('\0') among the first maxlen bytes. | |
| 40 */ | |
| 41 size_t fz_strnlen(const char *s, size_t maxlen); | |
| 42 | |
| 43 /** | |
| 44 Given a pointer to a C string (or a pointer to NULL) break | |
| 45 it at the first occurrence of a delimiter char (from a given | |
| 46 set). | |
| 47 | |
| 48 stringp: Pointer to a C string pointer (or NULL). Updated on | |
| 49 exit to point to the first char of the string after the | |
| 50 delimiter that was found. The string pointed to by stringp will | |
| 51 be corrupted by this call (as the found delimiter will be | |
| 52 overwritten by 0). | |
| 53 | |
| 54 delim: A C string of acceptable delimiter characters. | |
| 55 | |
| 56 Returns a pointer to a C string containing the chars of stringp | |
| 57 up to the first delimiter char (or the end of the string), or | |
| 58 NULL. | |
| 59 */ | |
| 60 char *fz_strsep(char **stringp, const char *delim); | |
| 61 | |
| 62 /** | |
| 63 Copy at most n-1 chars of a string into a destination | |
| 64 buffer with null termination, returning the real length of the | |
| 65 initial string (excluding terminator). | |
| 66 | |
| 67 dst: Destination buffer, at least n bytes long. | |
| 68 | |
| 69 src: C string (non-NULL). | |
| 70 | |
| 71 n: Size of dst buffer in bytes. | |
| 72 | |
| 73 Returns the length (excluding terminator) of src. | |
| 74 */ | |
| 75 size_t fz_strlcpy(char *dst, const char *src, size_t n); | |
| 76 | |
| 77 /** | |
| 78 Concatenate 2 strings, with a maximum length. | |
| 79 | |
| 80 dst: pointer to first string in a buffer of n bytes. | |
| 81 | |
| 82 src: pointer to string to concatenate. | |
| 83 | |
| 84 n: Size (in bytes) of buffer that dst is in. | |
| 85 | |
| 86 Returns the real length that a concatenated dst + src would have | |
| 87 been (not including terminator). | |
| 88 */ | |
| 89 size_t fz_strlcat(char *dst, const char *src, size_t n); | |
| 90 | |
| 91 /** | |
| 92 Safe strstr function. | |
| 93 | |
| 94 haystack: Where to look (may be NULL). | |
| 95 | |
| 96 needled: What to look for. | |
| 97 | |
| 98 Returns NULL if unmatched, or pointer to start of match. | |
| 99 */ | |
| 100 const char *fz_strstr(const char *haystack, const char *needle); | |
| 101 | |
| 102 /** | |
| 103 Safe case-insensitive strstr function. (Accepts UTF-8). | |
| 104 | |
| 105 haystack: Where to look (may be NULL). | |
| 106 | |
| 107 needled: What to look for. | |
| 108 | |
| 109 Returns NULL if unmatched, or pointer to start of match. | |
| 110 */ | |
| 111 const char *fz_strstrcase(const char *haystack, const char *needle); | |
| 112 | |
| 113 /** | |
| 114 Find the start of the first occurrence of the substring needle in haystack. | |
| 115 */ | |
| 116 void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); | |
| 117 | |
| 118 /** | |
| 119 extract the directory component from a path. | |
| 120 */ | |
| 121 void fz_dirname(char *dir, const char *path, size_t dirsize); | |
| 122 | |
| 123 /** | |
| 124 Find the filename component in a path. | |
| 125 */ | |
| 126 const char *fz_basename(const char *path); | |
| 127 | |
| 128 /** | |
| 129 portable strverscmp(3) function | |
| 130 */ | |
| 131 int fz_strverscmp(const char *s1, const char *s2); | |
| 132 | |
| 133 /** | |
| 134 Like fz_decode_uri_component but in-place. | |
| 135 */ | |
| 136 char *fz_urldecode(char *url); | |
| 137 | |
| 138 /** | |
| 139 * Return a new string representing the unencoded version of the given URI. | |
| 140 * This decodes all escape sequences except those that would result in a reserved | |
| 141 * character that are part of the URI syntax (; / ? : @ & = + $ , #). | |
| 142 */ | |
| 143 char *fz_decode_uri(fz_context *ctx, const char *s); | |
| 144 | |
| 145 /** | |
| 146 * Return a new string representing the unencoded version of the given URI component. | |
| 147 * This decodes all escape sequences! | |
| 148 */ | |
| 149 char *fz_decode_uri_component(fz_context *ctx, const char *s); | |
| 150 | |
| 151 /** | |
| 152 * Return a new string representing the provided string encoded as a URI. | |
| 153 */ | |
| 154 char *fz_encode_uri(fz_context *ctx, const char *s); | |
| 155 | |
| 156 /** | |
| 157 * Return a new string representing the provided string encoded as an URI component. | |
| 158 * This also encodes the special reserved characters (; / ? : @ & = + $ , #). | |
| 159 */ | |
| 160 char *fz_encode_uri_component(fz_context *ctx, const char *s); | |
| 161 | |
| 162 /** | |
| 163 * Return a new string representing the provided string encoded as an URI path name. | |
| 164 * This also encodes the special reserved characters except /. | |
| 165 */ | |
| 166 char *fz_encode_uri_pathname(fz_context *ctx, const char *s); | |
| 167 | |
| 168 /** | |
| 169 create output file name using a template. | |
| 170 | |
| 171 If the path contains %[0-9]*d, the first such pattern will be | |
| 172 replaced with the page number. If the template does not contain | |
| 173 such a pattern, the page number will be inserted before the | |
| 174 filename extension. If the template does not have a filename | |
| 175 extension, the page number will be added to the end. | |
| 176 */ | |
| 177 void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page); | |
| 178 | |
| 179 /** | |
| 180 rewrite path to the shortest string that names the same path. | |
| 181 | |
| 182 Eliminates multiple and trailing slashes, interprets "." and | |
| 183 "..". Overwrites the string in place. | |
| 184 */ | |
| 185 char *fz_cleanname(char *name); | |
| 186 | |
| 187 /** | |
| 188 rewrite path to the shortest string that names the same path. | |
| 189 | |
| 190 Eliminates multiple and trailing slashes, interprets "." and | |
| 191 "..". Allocates a new string that the caller must free. | |
| 192 */ | |
| 193 char *fz_cleanname_strdup(fz_context *ctx, const char *name); | |
| 194 | |
| 195 /** | |
| 196 Resolve a path to an absolute file name. | |
| 197 The resolved path buffer must be of at least PATH_MAX size. | |
| 198 */ | |
| 199 char *fz_realpath(const char *path, char *resolved_path); | |
| 200 | |
| 201 /** | |
| 202 Case insensitive (UTF8) string comparison. | |
| 203 */ | |
| 204 int fz_strcasecmp(const char *a, const char *b); | |
| 205 | |
| 206 /** | |
| 207 Case insensitive (UTF8) string comparison. | |
| 208 | |
| 209 n = maximum number of bytes to read from either a or b. | |
| 210 */ | |
| 211 int fz_strncasecmp(const char *a, const char *b, size_t n); | |
| 212 | |
| 213 /** | |
| 214 FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum | |
| 215 length returned by fz_chartorune). | |
| 216 */ | |
| 217 enum { FZ_UTFMAX = 4 }; | |
| 218 | |
| 219 /** | |
| 220 UTF8 decode a single rune from a sequence of chars. | |
| 221 | |
| 222 rune: Pointer to an int to assign the decoded 'rune' to. | |
| 223 (0xFFFD on error). | |
| 224 | |
| 225 str: Pointer to a UTF8 encoded string. | |
| 226 | |
| 227 Returns the number of bytes consumed. | |
| 228 */ | |
| 229 int fz_chartorune(int *rune, const char *str); | |
| 230 | |
| 231 /** | |
| 232 UTF8 decode a single rune from a sequence of chars | |
| 233 of given length. | |
| 234 | |
| 235 rune: Pointer to an int to assign the decoded 'rune' to. | |
| 236 (0xFFFD on error). | |
| 237 | |
| 238 str: Pointer to a UTF8 encoded string. | |
| 239 | |
| 240 n: The number of bytes available at str. | |
| 241 | |
| 242 Returns the number of bytes consumed. | |
| 243 */ | |
| 244 int fz_chartorunen(int *rune, const char *str, size_t n); | |
| 245 | |
| 246 /** | |
| 247 UTF8 encode a rune to a sequence of chars. | |
| 248 | |
| 249 str: Pointer to a place to put the UTF8 encoded character. | |
| 250 | |
| 251 rune: Pointer to a 'rune'. | |
| 252 | |
| 253 Returns the number of bytes the rune took to output. | |
| 254 */ | |
| 255 int fz_runetochar(char *str, int rune); | |
| 256 | |
| 257 /** | |
| 258 Count how many chars are required to represent a rune. | |
| 259 | |
| 260 rune: The rune to encode. | |
| 261 | |
| 262 Returns the number of bytes required to represent this run in | |
| 263 UTF8. | |
| 264 */ | |
| 265 int fz_runelen(int rune); | |
| 266 | |
| 267 /** | |
| 268 Compute the index of a rune in a string. | |
| 269 | |
| 270 str: Pointer to beginning of a string. | |
| 271 | |
| 272 p: Pointer to a char in str. | |
| 273 | |
| 274 Returns the index of the rune pointed to by p in str. | |
| 275 */ | |
| 276 int fz_runeidx(const char *str, const char *p); | |
| 277 | |
| 278 /** | |
| 279 Obtain a pointer to the char representing the rune | |
| 280 at a given index. | |
| 281 | |
| 282 str: Pointer to beginning of a string. | |
| 283 | |
| 284 idx: Index of a rune to return a char pointer to. | |
| 285 | |
| 286 Returns a pointer to the char where the desired rune starts, | |
| 287 or NULL if the string ends before the index is reached. | |
| 288 */ | |
| 289 const char *fz_runeptr(const char *str, int idx); | |
| 290 | |
| 291 /** | |
| 292 Count how many runes the UTF-8 encoded string | |
| 293 consists of. | |
| 294 | |
| 295 s: The UTF-8 encoded, NUL-terminated text string. | |
| 296 | |
| 297 Returns the number of runes in the string. | |
| 298 */ | |
| 299 int fz_utflen(const char *s); | |
| 300 | |
| 301 /* | |
| 302 Convert a wchar string into a new heap allocated utf8 one. | |
| 303 */ | |
| 304 char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s); | |
| 305 | |
| 306 /* | |
| 307 Convert a utf8 string into a new heap allocated wchar one. | |
| 308 */ | |
| 309 wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path); | |
| 310 | |
| 311 | |
| 312 /** | |
| 313 Locale-independent decimal to binary conversion. On overflow | |
| 314 return (-)INFINITY and set errno to ERANGE. On underflow return | |
| 315 0 and set errno to ERANGE. Special inputs (case insensitive): | |
| 316 "NAN", "INF" or "INFINITY". | |
| 317 */ | |
| 318 float fz_strtof(const char *s, char **es); | |
| 319 | |
| 320 int fz_grisu(float f, char *s, int *exp); | |
| 321 | |
| 322 /** | |
| 323 Check and parse string into page ranges: | |
| 324 /,?(-?\d+|N)(-(-?\d+|N))?/ | |
| 325 */ | |
| 326 int fz_is_page_range(fz_context *ctx, const char *s); | |
| 327 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n); | |
| 328 | |
| 329 /** | |
| 330 Unicode aware tolower and toupper functions. | |
| 331 */ | |
| 332 int fz_tolower(int c); | |
| 333 int fz_toupper(int c); | |
| 334 | |
| 335 #endif |
