diff cutils/util/walk.py @ 372:bfe1160fbfd3

treesum: Make ERROR outputs more consistent: use native paths where possible
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 13 Apr 2025 14:15:33 +0200
parents 29a301ff2501
children
line wrap: on
line diff
--- a/cutils/util/walk.py	Sat Apr 12 09:05:48 2025 +0200
+++ b/cutils/util/walk.py	Sun Apr 13 14:15:33 2025 +0200
@@ -25,7 +25,7 @@
 import stat
 import sys
 
-from . import PY2
+from . import PY2, escape_for_output
 
 
 HELP_FILETYPE_INDICATORS = r"""
@@ -124,7 +124,8 @@
 
     """
 
-    __slots__ = ("_name", "_path",     # encoded as given in the ctor
+    __slots__ = ("_name", "_npath",  # encoded as given in the ctor
+                 "_path",            # encoded as given but with shashes
                  "_is_symlink", "_is_reg", "_is_dir", "_stat_result",
                  "_stat_errno", "_stat_errstr",
                  "_alt_fsname", "_alt_u8name")
@@ -132,6 +133,8 @@
     def __init__(self, name, path):
         self._name = name    # the name as given in the constructor
         """The name exactly as given in the ctor"""
+        self._npath = path
+        """The path exactly as given in the ctor"""
         self._path = _unix_path(path)
         """The path as given in the ctor -- but normalized to have slashes"""
         self._is_symlink = self._is_reg = self._is_dir = self._stat_result = \
@@ -144,6 +147,11 @@
         return self._name
 
     @property
+    def npath(self):
+        """The original path exactly as given in the ctor"""
+        return self._npath
+
+    @property
     def path(self):
         """The original path exactly as given in the ctor -- but normalized to
         have forward slashes"""
@@ -172,6 +180,28 @@
         return s
 
     @property
+    def fsnpath(self):
+        """Always bytes.
+
+        Also do not allow TAB, CR or LF in the path.
+
+        :rtype: bytes or None
+
+        """
+        if PY2:
+            if isinstance(self._npath, bytes):
+                p = self._npath
+            try:
+                p = self._npath.encode(_FSENCODING, "strict")
+            except UnicodeError:
+                return None
+        else:
+            p = os.fsencode(self._npath)
+        if (b'\n' in p) or (b'\r' in p) or (b'\t' in p) or (b'\\' in p):
+            return None
+        return p
+
+    @property
     def fspath(self):
         """Always bytes.
 
@@ -205,6 +235,15 @@
         return self._alt_fsname
 
     @property
+    def alt_fsnpath(self):
+        """Alternative and "escaped" filesystem path -- always bytes.
+
+        :rtype: bytes
+
+        """
+        return WalkDirEntry.alt_fs(self._npath)
+
+    @property
     def alt_fspath(self):
         """Alternative and "escaped" filesystem path -- always bytes.
 
@@ -220,16 +259,7 @@
         # ... and hope that the current FS encoding is compatible
         #     with it
         #
-        if isinstance(what, bytes):
-            s = (what.replace(b'\\', b"\\\\")
-                 .replace(b'\n', b"\\x0a")
-                 .replace(b'\r', b"\\x0d")
-                 .replace(b'\t', b"\\x09"))
-        else:
-            s = (what.replace(u'\\', u"\\\\")
-                 .replace(u'\n', u"\\x0a")
-                 .replace(u'\r', u"\\x0d")
-                 .replace(u'\t', u"\\x09"))
+        s = escape_for_output(what)
         if PY2:
             if isinstance(s, bytes):
                 return s
@@ -262,6 +292,29 @@
             return self._name
 
     @property
+    def unpath(self):
+        """Always "real", strictly encoded Unicode or `None` if this is not
+        possible.
+
+        :rtype: text or None
+
+        """
+        if PY2:
+            if isinstance(self._npath, bytes):
+                try:
+                    return self._npath.decode(_FSENCODING, "strict")
+                except UnicodeError:
+                    return None
+            else:
+                return self._npath
+        else:
+            try:
+                self._npath.encode("utf-8", "strict")
+            except UnicodeError:
+                return None
+            return self._npath
+
+    @property
     def upath(self):
         """Always "real", strictly encoded Unicode or `None` if this is not
         possible.
@@ -299,6 +352,20 @@
         return n.encode("utf-8", "strict")
 
     @property
+    def u8npath(self):
+        """`.unpath` as UTF-8 or `None` (as strict as `upath`.
+
+        Also do not allow TAB, CR or LF in the path.
+
+        """
+        p = self.unpath
+        if p is None:
+            return None
+        if (u'\n' in p) or (u'\r' in p) or (u'\t' in p) or (u'\\' in p):
+            return None
+        return p.encode("utf-8", "strict")
+
+    @property
     def u8path(self):
         """`.upath` as UTF-8 or `None` (as strict as `upath`.
 
@@ -319,6 +386,10 @@
         return self._alt_u8name
 
     @property
+    def alt_u8npath(self):
+        return WalkDirEntry.alt_u8(self._npath)
+
+    @property
     def alt_u8path(self):
         return WalkDirEntry.alt_u8(self._path)
 
@@ -329,16 +400,7 @@
         # ... and hope that the current UTF-8 is compatible
         #     with it
         #
-        if isinstance(what, bytes):
-            s = (what.replace(b'\\', b"\\\\")
-                 .replace(b'\n', b"\\x0a")
-                 .replace(b'\r', b"\\x0d")
-                 .replace(b'\t', b"\\x09"))
-        else:
-            s = (what.replace(u'\\', u"\\\\")
-                 .replace(u'\n', u"\\x0a")
-                 .replace(u'\r', u"\\x0d")
-                 .replace(u'\t', u"\\x09"))
+        s = escape_for_output(what)
         if PY2:
             if isinstance(s, bytes):
                 try:
@@ -352,6 +414,22 @@
         else:
             return s.encode("utf-8", "backslashreplace")
 
+    @staticmethod
+    def alt_bytes(what, use_utf8):
+        if not what:
+            return what
+        if use_utf8:
+            return WalkDirEntry.alt_u8(what)
+        else:
+            return WalkDirEntry.alt_fs(what)
+
+    @staticmethod
+    def alt_text(what, use_utf8):
+        b = WalkDirEntry.alt_bytes(what, use_utf8)
+        if PY2:
+            return b
+        return b.decode("iso-8859-1")
+
     @property
     def is_symlink(self):
         return self._is_symlink