comparison cutils/util/walk.py @ 276:f7850ff5cbe0

treesum: when walking: if filenames contain CR and/or LF pretend that this names are not properly encoded. So the alt_xxx() path is to be used with a proper additional backslash encoding. Also the directory is marked tainted.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 21 Feb 2025 16:33:56 +0100
parents 224725fd9f2f
children f3e0b479928c
comparison
equal deleted inserted replaced
275:c72f5b2dbc6f 276:f7850ff5cbe0
186 return None 186 return None
187 return self._path 187 return self._path
188 188
189 @property 189 @property
190 def u8name(self): 190 def u8name(self):
191 """`.uname` as UTF-8 or `None` (as strict as `uname`)""" 191 """`.uname` as UTF-8 or `None` (as strict as `uname`).
192
193 Also do not allow CR of LF in the name.
194
195 """
192 n = self.uname 196 n = self.uname
193 return n if n is None else n.encode("utf-8", "strict") 197 if n is None:
198 return None
199 if (u'\n' in n) or (u'\r' in n):
200 return None
201 return n.encode("utf-8", "strict")
194 202
195 @property 203 @property
196 def u8path(self): 204 def u8path(self):
197 """`.upath` as UTF-8 or `None` (as strict as `upath`""" 205 """`.upath` as UTF-8 or `None` (as strict as `upath`.
206
207 Also do not allow CR or LF in the path.
208
209 """
198 p = self.upath 210 p = self.upath
199 return p if p is None else p.encode("utf-8", "strict") 211 if p is None:
212 return None
213 if (u'\n' in p) or (u'\r' in p):
214 return None
215 return p.encode("utf-8", "strict")
200 216
201 @property 217 @property
202 def alt_u8name(self): 218 def alt_u8name(self):
203 if self._alt_u8name is _notset: 219 if self._alt_u8name is _notset:
204 self._alt_u8name = WalkDirEntry.alt_u8(self._name) 220 self._alt_u8name = WalkDirEntry.alt_u8(self._name)
211 @staticmethod 227 @staticmethod
212 def alt_u8(what): 228 def alt_u8(what):
213 if PY2: 229 if PY2:
214 if isinstance(what, bytes): 230 if isinstance(what, bytes):
215 try: 231 try:
216 return (what.decode(_FSENCODING, "strict") 232 s = (what.decode(_FSENCODING, "strict")
217 .encode("utf-8", "strict")) 233 .encode("utf-8", "strict"))
218 except UnicodeError: 234 except UnicodeError:
219 return (WalkDirEntry.surrogate_decode(what) 235 s = (WalkDirEntry.surrogate_decode(what)
220 .encode("ascii", "backslashreplace")) 236 .encode("ascii", "backslashreplace"))
221 else: 237 else:
222 return what.encode("ascii", "backslashreplace") 238 s = what.encode("ascii", "backslashreplace")
223 else: 239 else:
224 return what.encode("utf-8", "backslashreplace") 240 s = what.encode("utf-8", "backslashreplace")
241 return s.replace(b'\n', b"\\x0a").replace(b'\r', b"\\x0d")
225 242
226 @property 243 @property
227 def is_symlink(self): 244 def is_symlink(self):
228 return self._is_symlink 245 return self._is_symlink
229 246