diff mupdf-source/include/mupdf/fitz/string-util.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/include/mupdf/fitz/string-util.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,335 @@
+// Copyright (C) 2004-2025 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_FITZ_STRING_H
+#define MUPDF_FITZ_STRING_H
+
+#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"
+
+/* The Unicode character used to incoming character whose value is
+ * unknown or unrepresentable. */
+#define FZ_REPLACEMENT_CHARACTER 0xFFFD
+
+/**
+	Safe string functions
+*/
+
+/**
+	Return strlen(s), if that is less than maxlen, or maxlen if
+	there is no null byte ('\0') among the first maxlen bytes.
+*/
+size_t fz_strnlen(const char *s, size_t maxlen);
+
+/**
+	Given a pointer to a C string (or a pointer to NULL) break
+	it at the first occurrence of a delimiter char (from a given
+	set).
+
+	stringp: Pointer to a C string pointer (or NULL). Updated on
+	exit to point to the first char of the string after the
+	delimiter that was found. The string pointed to by stringp will
+	be corrupted by this call (as the found delimiter will be
+	overwritten by 0).
+
+	delim: A C string of acceptable delimiter characters.
+
+	Returns a pointer to a C string containing the chars of stringp
+	up to the first delimiter char (or the end of the string), or
+	NULL.
+*/
+char *fz_strsep(char **stringp, const char *delim);
+
+/**
+	Copy at most n-1 chars of a string into a destination
+	buffer with null termination, returning the real length of the
+	initial string (excluding terminator).
+
+	dst: Destination buffer, at least n bytes long.
+
+	src: C string (non-NULL).
+
+	n: Size of dst buffer in bytes.
+
+	Returns the length (excluding terminator) of src.
+*/
+size_t fz_strlcpy(char *dst, const char *src, size_t n);
+
+/**
+	Concatenate 2 strings, with a maximum length.
+
+	dst: pointer to first string in a buffer of n bytes.
+
+	src: pointer to string to concatenate.
+
+	n: Size (in bytes) of buffer that dst is in.
+
+	Returns the real length that a concatenated dst + src would have
+	been (not including terminator).
+*/
+size_t fz_strlcat(char *dst, const char *src, size_t n);
+
+/**
+	Safe strstr function.
+
+	haystack: Where to look (may be NULL).
+
+	needled: What to look for.
+
+	Returns NULL if unmatched, or pointer to start of match.
+*/
+const char *fz_strstr(const char *haystack, const char *needle);
+
+/**
+	Safe case-insensitive strstr function. (Accepts UTF-8).
+
+	haystack: Where to look (may be NULL).
+
+	needled: What to look for.
+
+	Returns NULL if unmatched, or pointer to start of match.
+*/
+const char *fz_strstrcase(const char *haystack, const char *needle);
+
+/**
+	Find the start of the first occurrence of the substring needle in haystack.
+*/
+void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
+
+/**
+	extract the directory component from a path.
+*/
+void fz_dirname(char *dir, const char *path, size_t dirsize);
+
+/**
+	Find the filename component in a path.
+*/
+const char *fz_basename(const char *path);
+
+/**
+	portable strverscmp(3) function
+*/
+int fz_strverscmp(const char *s1, const char *s2);
+
+/**
+	Like fz_decode_uri_component but in-place.
+*/
+char *fz_urldecode(char *url);
+
+/**
+ * Return a new string representing the unencoded version of the given URI.
+ * This decodes all escape sequences except those that would result in a reserved
+ * character that are part of the URI syntax (; / ? : @ & = + $ , #).
+ */
+char *fz_decode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the unencoded version of the given URI component.
+ * This decodes all escape sequences!
+ */
+char *fz_decode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as a URI.
+ */
+char *fz_encode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI component.
+ * This also encodes the special reserved characters (; / ? : @ & = + $ , #).
+ */
+char *fz_encode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI path name.
+ * This also encodes the special reserved characters except /.
+ */
+char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
+
+/**
+	create output file name using a template.
+
+	If the path contains %[0-9]*d, the first such pattern will be
+	replaced with the page number. If the template does not contain
+	such a pattern, the page number will be inserted before the
+	filename extension. If the template does not have a filename
+	extension, the page number will be added to the end.
+*/
+void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);
+
+/**
+	rewrite path to the shortest string that names the same path.
+
+	Eliminates multiple and trailing slashes, interprets "." and
+	"..". Overwrites the string in place.
+*/
+char *fz_cleanname(char *name);
+
+/**
+	rewrite path to the shortest string that names the same path.
+
+	Eliminates multiple and trailing slashes, interprets "." and
+	"..". Allocates a new string that the caller must free.
+*/
+char *fz_cleanname_strdup(fz_context *ctx, const char *name);
+
+/**
+	Resolve a path to an absolute file name.
+	The resolved path buffer must be of at least PATH_MAX size.
+*/
+char *fz_realpath(const char *path, char *resolved_path);
+
+/**
+	Case insensitive (UTF8) string comparison.
+*/
+int fz_strcasecmp(const char *a, const char *b);
+
+/**
+	Case insensitive (UTF8) string comparison.
+
+	n = maximum number of bytes to read from either a or b.
+*/
+int fz_strncasecmp(const char *a, const char *b, size_t n);
+
+/**
+	FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
+	length returned by fz_chartorune).
+*/
+enum { FZ_UTFMAX = 4 };
+
+/**
+	UTF8 decode a single rune from a sequence of chars.
+
+	rune: Pointer to an int to assign the decoded 'rune' to.
+	(0xFFFD on error).
+
+	str: Pointer to a UTF8 encoded string.
+
+	Returns the number of bytes consumed.
+*/
+int fz_chartorune(int *rune, const char *str);
+
+/**
+	UTF8 decode a single rune from a sequence of chars
+	of given length.
+
+	rune: Pointer to an int to assign the decoded 'rune' to.
+	(0xFFFD on error).
+
+	str: Pointer to a UTF8 encoded string.
+
+	n: The number of bytes available at str.
+
+	Returns the number of bytes consumed.
+*/
+int fz_chartorunen(int *rune, const char *str, size_t n);
+
+/**
+	UTF8 encode a rune to a sequence of chars.
+
+	str: Pointer to a place to put the UTF8 encoded character.
+
+	rune: Pointer to a 'rune'.
+
+	Returns the number of bytes the rune took to output.
+*/
+int fz_runetochar(char *str, int rune);
+
+/**
+	Count how many chars are required to represent a rune.
+
+	rune: The rune to encode.
+
+	Returns the number of bytes required to represent this run in
+	UTF8.
+*/
+int fz_runelen(int rune);
+
+/**
+	Compute the index of a rune in a string.
+
+	str: Pointer to beginning of a string.
+
+	p: Pointer to a char in str.
+
+	Returns the index of the rune pointed to by p in str.
+*/
+int fz_runeidx(const char *str, const char *p);
+
+/**
+	Obtain a pointer to the char representing the rune
+	at a given index.
+
+	str: Pointer to beginning of a string.
+
+	idx: Index of a rune to return a char pointer to.
+
+	Returns a pointer to the char where the desired rune starts,
+	or NULL if the string ends before the index is reached.
+*/
+const char *fz_runeptr(const char *str, int idx);
+
+/**
+	Count how many runes the UTF-8 encoded string
+	consists of.
+
+	s: The UTF-8 encoded, NUL-terminated text string.
+
+	Returns the number of runes in the string.
+*/
+int fz_utflen(const char *s);
+
+/*
+	Convert a wchar string into a new heap allocated utf8 one.
+*/
+char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
+
+/*
+	Convert a utf8 string into a new heap allocated wchar one.
+*/
+wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
+
+
+/**
+	Locale-independent decimal to binary conversion. On overflow
+	return (-)INFINITY and set errno to ERANGE. On underflow return
+	0 and set errno to ERANGE. Special inputs (case insensitive):
+	"NAN", "INF" or "INFINITY".
+*/
+float fz_strtof(const char *s, char **es);
+
+int fz_grisu(float f, char *s, int *exp);
+
+/**
+	Check and parse string into page ranges:
+		/,?(-?\d+|N)(-(-?\d+|N))?/
+*/
+int fz_is_page_range(fz_context *ctx, const char *s);
+const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);
+
+/**
+	Unicode aware tolower and toupper functions.
+*/
+int fz_tolower(int c);
+int fz_toupper(int c);
+
+#endif