changeset 276:f7850ff5cbe0

treesum: when walking: if filenames contain CR and/or LF pretend that this names are not properly encoded. So the alt_xxx() path is to be used with a proper additional backslash encoding. Also the directory is marked tainted.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 21 Feb 2025 16:33:56 +0100
parents c72f5b2dbc6f
children 9676ecd32a07
files cutils/util/walk.py
diffstat 1 files changed, 27 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/util/walk.py	Thu Feb 20 15:30:51 2025 +0100
+++ b/cutils/util/walk.py	Fri Feb 21 16:33:56 2025 +0100
@@ -188,15 +188,31 @@
 
     @property
     def u8name(self):
-        """`.uname` as UTF-8 or `None` (as strict as `uname`)"""
+        """`.uname` as UTF-8 or `None` (as strict as `uname`).
+
+        Also do not allow CR of LF in the name.
+
+        """
         n = self.uname
-        return n if n is None else n.encode("utf-8", "strict")
+        if n is None:
+            return None
+        if (u'\n' in n) or (u'\r' in n):
+            return None
+        return n.encode("utf-8", "strict")
 
     @property
     def u8path(self):
-        """`.upath` as UTF-8 or `None` (as strict as `upath`"""
+        """`.upath` as UTF-8 or `None` (as strict as `upath`.
+
+        Also do not allow CR or LF in the path.
+
+        """
         p = self.upath
-        return p if p is None else p.encode("utf-8", "strict")
+        if p is None:
+            return None
+        if (u'\n' in p) or (u'\r' in p):
+            return None
+        return p.encode("utf-8", "strict")
 
     @property
     def alt_u8name(self):
@@ -213,15 +229,16 @@
         if PY2:
             if isinstance(what, bytes):
                 try:
-                    return (what.decode(_FSENCODING, "strict")
-                            .encode("utf-8", "strict"))
+                    s = (what.decode(_FSENCODING, "strict")
+                         .encode("utf-8", "strict"))
                 except UnicodeError:
-                    return (WalkDirEntry.surrogate_decode(what)
-                            .encode("ascii", "backslashreplace"))
+                    s = (WalkDirEntry.surrogate_decode(what)
+                         .encode("ascii", "backslashreplace"))
             else:
-                return what.encode("ascii", "backslashreplace")
+                s =  what.encode("ascii", "backslashreplace")
         else:
-            return what.encode("utf-8", "backslashreplace")
+            s = what.encode("utf-8", "backslashreplace")
+        return s.replace(b'\n', b"\\x0a").replace(b'\r', b"\\x0d")
 
     @property
     def is_symlink(self):