comparison mupdf-source/include/mupdf/fitz/string-util.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #ifndef MUPDF_FITZ_STRING_H
24 #define MUPDF_FITZ_STRING_H
25
26 #include "mupdf/fitz/system.h"
27 #include "mupdf/fitz/context.h"
28
29 /* The Unicode character used to incoming character whose value is
30 * unknown or unrepresentable. */
31 #define FZ_REPLACEMENT_CHARACTER 0xFFFD
32
33 /**
34 Safe string functions
35 */
36
37 /**
38 Return strlen(s), if that is less than maxlen, or maxlen if
39 there is no null byte ('\0') among the first maxlen bytes.
40 */
41 size_t fz_strnlen(const char *s, size_t maxlen);
42
43 /**
44 Given a pointer to a C string (or a pointer to NULL) break
45 it at the first occurrence of a delimiter char (from a given
46 set).
47
48 stringp: Pointer to a C string pointer (or NULL). Updated on
49 exit to point to the first char of the string after the
50 delimiter that was found. The string pointed to by stringp will
51 be corrupted by this call (as the found delimiter will be
52 overwritten by 0).
53
54 delim: A C string of acceptable delimiter characters.
55
56 Returns a pointer to a C string containing the chars of stringp
57 up to the first delimiter char (or the end of the string), or
58 NULL.
59 */
60 char *fz_strsep(char **stringp, const char *delim);
61
62 /**
63 Copy at most n-1 chars of a string into a destination
64 buffer with null termination, returning the real length of the
65 initial string (excluding terminator).
66
67 dst: Destination buffer, at least n bytes long.
68
69 src: C string (non-NULL).
70
71 n: Size of dst buffer in bytes.
72
73 Returns the length (excluding terminator) of src.
74 */
75 size_t fz_strlcpy(char *dst, const char *src, size_t n);
76
77 /**
78 Concatenate 2 strings, with a maximum length.
79
80 dst: pointer to first string in a buffer of n bytes.
81
82 src: pointer to string to concatenate.
83
84 n: Size (in bytes) of buffer that dst is in.
85
86 Returns the real length that a concatenated dst + src would have
87 been (not including terminator).
88 */
89 size_t fz_strlcat(char *dst, const char *src, size_t n);
90
91 /**
92 Safe strstr function.
93
94 haystack: Where to look (may be NULL).
95
96 needled: What to look for.
97
98 Returns NULL if unmatched, or pointer to start of match.
99 */
100 const char *fz_strstr(const char *haystack, const char *needle);
101
102 /**
103 Safe case-insensitive strstr function. (Accepts UTF-8).
104
105 haystack: Where to look (may be NULL).
106
107 needled: What to look for.
108
109 Returns NULL if unmatched, or pointer to start of match.
110 */
111 const char *fz_strstrcase(const char *haystack, const char *needle);
112
113 /**
114 Find the start of the first occurrence of the substring needle in haystack.
115 */
116 void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
117
118 /**
119 extract the directory component from a path.
120 */
121 void fz_dirname(char *dir, const char *path, size_t dirsize);
122
123 /**
124 Find the filename component in a path.
125 */
126 const char *fz_basename(const char *path);
127
128 /**
129 portable strverscmp(3) function
130 */
131 int fz_strverscmp(const char *s1, const char *s2);
132
133 /**
134 Like fz_decode_uri_component but in-place.
135 */
136 char *fz_urldecode(char *url);
137
138 /**
139 * Return a new string representing the unencoded version of the given URI.
140 * This decodes all escape sequences except those that would result in a reserved
141 * character that are part of the URI syntax (; / ? : @ & = + $ , #).
142 */
143 char *fz_decode_uri(fz_context *ctx, const char *s);
144
145 /**
146 * Return a new string representing the unencoded version of the given URI component.
147 * This decodes all escape sequences!
148 */
149 char *fz_decode_uri_component(fz_context *ctx, const char *s);
150
151 /**
152 * Return a new string representing the provided string encoded as a URI.
153 */
154 char *fz_encode_uri(fz_context *ctx, const char *s);
155
156 /**
157 * Return a new string representing the provided string encoded as an URI component.
158 * This also encodes the special reserved characters (; / ? : @ & = + $ , #).
159 */
160 char *fz_encode_uri_component(fz_context *ctx, const char *s);
161
162 /**
163 * Return a new string representing the provided string encoded as an URI path name.
164 * This also encodes the special reserved characters except /.
165 */
166 char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
167
168 /**
169 create output file name using a template.
170
171 If the path contains %[0-9]*d, the first such pattern will be
172 replaced with the page number. If the template does not contain
173 such a pattern, the page number will be inserted before the
174 filename extension. If the template does not have a filename
175 extension, the page number will be added to the end.
176 */
177 void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);
178
179 /**
180 rewrite path to the shortest string that names the same path.
181
182 Eliminates multiple and trailing slashes, interprets "." and
183 "..". Overwrites the string in place.
184 */
185 char *fz_cleanname(char *name);
186
187 /**
188 rewrite path to the shortest string that names the same path.
189
190 Eliminates multiple and trailing slashes, interprets "." and
191 "..". Allocates a new string that the caller must free.
192 */
193 char *fz_cleanname_strdup(fz_context *ctx, const char *name);
194
195 /**
196 Resolve a path to an absolute file name.
197 The resolved path buffer must be of at least PATH_MAX size.
198 */
199 char *fz_realpath(const char *path, char *resolved_path);
200
201 /**
202 Case insensitive (UTF8) string comparison.
203 */
204 int fz_strcasecmp(const char *a, const char *b);
205
206 /**
207 Case insensitive (UTF8) string comparison.
208
209 n = maximum number of bytes to read from either a or b.
210 */
211 int fz_strncasecmp(const char *a, const char *b, size_t n);
212
213 /**
214 FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
215 length returned by fz_chartorune).
216 */
217 enum { FZ_UTFMAX = 4 };
218
219 /**
220 UTF8 decode a single rune from a sequence of chars.
221
222 rune: Pointer to an int to assign the decoded 'rune' to.
223 (0xFFFD on error).
224
225 str: Pointer to a UTF8 encoded string.
226
227 Returns the number of bytes consumed.
228 */
229 int fz_chartorune(int *rune, const char *str);
230
231 /**
232 UTF8 decode a single rune from a sequence of chars
233 of given length.
234
235 rune: Pointer to an int to assign the decoded 'rune' to.
236 (0xFFFD on error).
237
238 str: Pointer to a UTF8 encoded string.
239
240 n: The number of bytes available at str.
241
242 Returns the number of bytes consumed.
243 */
244 int fz_chartorunen(int *rune, const char *str, size_t n);
245
246 /**
247 UTF8 encode a rune to a sequence of chars.
248
249 str: Pointer to a place to put the UTF8 encoded character.
250
251 rune: Pointer to a 'rune'.
252
253 Returns the number of bytes the rune took to output.
254 */
255 int fz_runetochar(char *str, int rune);
256
257 /**
258 Count how many chars are required to represent a rune.
259
260 rune: The rune to encode.
261
262 Returns the number of bytes required to represent this run in
263 UTF8.
264 */
265 int fz_runelen(int rune);
266
267 /**
268 Compute the index of a rune in a string.
269
270 str: Pointer to beginning of a string.
271
272 p: Pointer to a char in str.
273
274 Returns the index of the rune pointed to by p in str.
275 */
276 int fz_runeidx(const char *str, const char *p);
277
278 /**
279 Obtain a pointer to the char representing the rune
280 at a given index.
281
282 str: Pointer to beginning of a string.
283
284 idx: Index of a rune to return a char pointer to.
285
286 Returns a pointer to the char where the desired rune starts,
287 or NULL if the string ends before the index is reached.
288 */
289 const char *fz_runeptr(const char *str, int idx);
290
291 /**
292 Count how many runes the UTF-8 encoded string
293 consists of.
294
295 s: The UTF-8 encoded, NUL-terminated text string.
296
297 Returns the number of runes in the string.
298 */
299 int fz_utflen(const char *s);
300
301 /*
302 Convert a wchar string into a new heap allocated utf8 one.
303 */
304 char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
305
306 /*
307 Convert a utf8 string into a new heap allocated wchar one.
308 */
309 wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
310
311
312 /**
313 Locale-independent decimal to binary conversion. On overflow
314 return (-)INFINITY and set errno to ERANGE. On underflow return
315 0 and set errno to ERANGE. Special inputs (case insensitive):
316 "NAN", "INF" or "INFINITY".
317 */
318 float fz_strtof(const char *s, char **es);
319
320 int fz_grisu(float f, char *s, int *exp);
321
322 /**
323 Check and parse string into page ranges:
324 /,?(-?\d+|N)(-(-?\d+|N))?/
325 */
326 int fz_is_page_range(fz_context *ctx, const char *s);
327 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);
328
329 /**
330 Unicode aware tolower and toupper functions.
331 */
332 int fz_tolower(int c);
333 int fz_toupper(int c);
334
335 #endif