Mercurial > hgrepos > Python > apps > py-cutils
diff cutils/util/walk.py @ 201:58d93453c307
Much more encoding-related methods for DirWalkEntry and some unittests
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 21 Jan 2025 14:30:06 +0100 |
| parents | b2aba84ca426 |
| children | 3a85f7bbe0b1 |
line wrap: on
line diff
--- a/cutils/util/walk.py Fri Jan 17 20:12:58 2025 +0100 +++ b/cutils/util/walk.py Tue Jan 21 14:30:06 2025 +0100 @@ -10,7 +10,7 @@ from __future__ import print_function, absolute_import -__all__ = ["ScanDir", "getfsencoding"] +__all__ = ["WalkDirEntry", "ScanDir", "getfsencoding"] import os @@ -26,9 +26,25 @@ from . import PY2 +_notset = object() + + _FSENCODING = sys.getfilesystemencoding() +if PY2: + + def _unix_path(s): + if isinstance(s, bytes): + return s.replace(b"\\", b"/") + return s.replace(u"\\", u"/") + +else: + + def _unix_path(s): + return s.replace("\\", "/") + + class WalkDirEntry(object): """A :class:`os.DirEntry` alike to be used in :func:`walk` and for @@ -36,47 +52,188 @@ """ - __slots__ = ("_name", "_fsname", "_path", "_fspath", "_is_symlink", - "_is_dir", "_stat_result") + __slots__ = ("_name", "_path", # encoded as given in the ctor + "_is_symlink", "_is_dir", "_stat_result", + "_alt_fsname", "_alt_u8name") - def __init__(self, name): - self._name = name - if PY2: - assert isinstance(name, bytes) - self._fsname = name - else: - self._name = name - self._fsname = os.fsencode(name) - self._path = None - self._fspath = None + def __init__(self, name, path): + self._name = name # the name as given in the constructor + """The name exactly as given in the ctor""" + self._path = _unix_path(path) + """The path as given in the ctor -- but normalized to have slashes""" self._is_symlink = self._is_dir = self._stat_result = None + self._alt_fsname = self._alt_u8name = _notset @property def name(self): - """The native name""" + """The original name exactly as given in the ctor""" return self._name @property - def fsname(self): - """The name as bytes""" - return self._fsname - - @property def path(self): - """Always native""" + """The original path exactly as given in the ctor.""" return self._path @property + def fsname(self): + """The name as bytes for the filesystem. + + :rtype: bytes or None + + """ + if PY2: + if isinstance(self._name, bytes): + return self._name + try: + return self._name.encode(_FSENCODING, "strict") + except UnicodeError: + return None + else: + return os.fsencode(self._name) + + @property + def alt_fsname(self): + """Alternative and "escaped" filesystem name -- always bytes. + + :rtype: bytes + + """ + if self._alt_fsname is _notset: + if PY2: + if isinstance(self._name, bytes): + self._alt_fsname = self._name + else: + self._alt_fsname = self._name.encode( + _FSENCODING, "backslashreplace") + else: + self._alt_fsname = os.fsencode(self._name) + return self._alt_fsname + + @property def fspath(self): - """Always bytes""" - if self._path is not None: - if self._fspath is None: - if PY2: - assert isinstance(self._path, bytes) - self._fspath = self._path + """Always bytes. + + :rtype: bytes or None + + """ + if PY2: + if isinstance(self._path, bytes): + return self._path + try: + return self._path.encode(_FSENCODING, "strict") + except UnicodeError: + return None + else: + return os.fsencode(self._path) + + @property + def alt_fspath(self): + """Alternative and "escaped" filesystem path -- always bytes. + + :rtype: bytes + + """ + if PY2: + if isinstance(self._path, bytes): + return self._path + return self._path.encode(_FSENCODING, "backslashreplace") + else: + return os.fsencode(self._path) + + @property + def uname(self): + """Always "real", strictly encoded Unicode or `None` if this is not + possible. + + :rtype: text or None + + """ + if PY2: + if isinstance(self._name, bytes): + try: + return self._name.decode(_FSENCODING, "strict") + except UnicodeError: + return None + else: + return self._name + else: + try: + self._name.encode("utf-8", "strict") + except UnicodeError: + return None + return self._name + + @property + def upath(self): + """Always "real", strictly encoded Unicode or `None` if this is not + possible. + + :rtype: text or None + + """ + if PY2: + if isinstance(self._path, bytes): + try: + return self._path.decode(_FSENCODING, "strict") + except UnicodeError: + return None + else: + return self._path + else: + try: + self._path.encode("utf-8", "strict") + except UnicodeError: + return None + return self._path + + @property + def u8name(self): + """`.uname` as UTF-8 or `None` (as strict as `uname`)""" + n = self.uname + return n if n is None else n.encode("utf-8", "strict") + + @property + def u8path(self): + """`.upath` as UTF-8 or `None` (as strict as `upath`""" + p = self.upath + return p if p is None else p.encode("utf-8", "strict") + + @property + def alt_u8name(self): + if self._alt_u8name is _notset: + if PY2: + if isinstance(self._name, bytes): + try: + self._alt_u8name = ( + self._name + .decode(_FSENCODING, "strict") + .encode("utf-8", "strict")) + except UnicodeError: + self._alt_u8name = ( + self.surrogate_decode(self._name) + .encode("ascii", "backslashreplace")) else: - self._fspath = os.fsencode(self._path) - return self._fspath + self._alt_u8name = self._name.encode( + "ascii", "backslashreplace") + else: + self._alt_u8name = self._name.encode( + "utf-8", "backslashreplace") + return self._alt_u8name + + @property + def alt_u8path(self): + if PY2: + if isinstance(self._path, bytes): + try: + return (self._path.decode(_FSENCODING, "strict") + .encode("utf-8", "strict")) + except UnicodeError: + return (self.surrogate_decode(self._path) + .encode("ascii", "backslashreplace")) + else: + return self._path.encode("ascii", "backslashreplace") + else: + return self._path.encode("utf-8", "backslashreplace") @property def is_symlink(self): @@ -102,8 +259,7 @@ @classmethod def from_direntry(cls_, entry): - w = cls_(entry.name) - w._path = entry.path + w = cls_(entry.name, entry.path) try: w._is_dir = entry.is_dir(follow_symlinks=True) except OSError: @@ -125,9 +281,9 @@ return w @classmethod - def from_path_name(cls_, path, name): - w = cls_(name) - w._path = os.path.join(path, name) + def from_path_name(cls_, path, name, _do_stat=True): + """`_nostat` is to be used only for testing purposes""" + w = cls_(name, os.path.join(path, name)) try: w._is_dir = os.path.isdir(w._path) except OSError: @@ -144,12 +300,40 @@ # is not a symbolic link, same behaviour than os.path.islink(). # w._is_symlink = False - w._stat_result = os.stat(w._path) + if _do_stat: + w._stat_result = os.stat(w._path) + return w + + @classmethod + def from_readlink(cls_, path): + w = cls_(os.path.basename(path), path) return w @staticmethod def sort_key(entry): - return entry._fsname + return entry.alt_fsname # because it should never throw + + @staticmethod + def alt_sort_key(entry): + return entry.alt_u8name # because it should never throw + + if PY2: + + @staticmethod + def surrogate_decode(what): + """Decode the bytes object `what` using surrogates from :pep:`383` + for all non-ASCII octets. + + """ + uwhat = [] + assert isinstance(what, bytes) + for ch in what: + chcode = ord(ch) + if chcode <= 0x7f: + uwhat.append(unichr(chcode)) # noqa: F821 unichr + else: + uwhat.append(unichr(0xDC00 + chcode)) # noqa: F821 unichr + return u"".join(uwhat) if scandir:
