comparison cutils/util/walk.py @ 372:bfe1160fbfd3

treesum: Make ERROR outputs more consistent: use native paths where possible
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 13 Apr 2025 14:15:33 +0200
parents 29a301ff2501
children
comparison
equal deleted inserted replaced
371:29a301ff2501 372:bfe1160fbfd3
23 except ImportError: 23 except ImportError:
24 scandir = None 24 scandir = None
25 import stat 25 import stat
26 import sys 26 import sys
27 27
28 from . import PY2 28 from . import PY2, escape_for_output
29 29
30 30
31 HELP_FILETYPE_INDICATORS = r""" 31 HELP_FILETYPE_INDICATORS = r"""
32 FILETYPE INDICATORS 32 FILETYPE INDICATORS
33 =================== 33 ===================
122 """A :class:`os.DirEntry` alike to be used in :func:`walk` and for 122 """A :class:`os.DirEntry` alike to be used in :func:`walk` and for
123 its results. 123 its results.
124 124
125 """ 125 """
126 126
127 __slots__ = ("_name", "_path", # encoded as given in the ctor 127 __slots__ = ("_name", "_npath", # encoded as given in the ctor
128 "_path", # encoded as given but with shashes
128 "_is_symlink", "_is_reg", "_is_dir", "_stat_result", 129 "_is_symlink", "_is_reg", "_is_dir", "_stat_result",
129 "_stat_errno", "_stat_errstr", 130 "_stat_errno", "_stat_errstr",
130 "_alt_fsname", "_alt_u8name") 131 "_alt_fsname", "_alt_u8name")
131 132
132 def __init__(self, name, path): 133 def __init__(self, name, path):
133 self._name = name # the name as given in the constructor 134 self._name = name # the name as given in the constructor
134 """The name exactly as given in the ctor""" 135 """The name exactly as given in the ctor"""
136 self._npath = path
137 """The path exactly as given in the ctor"""
135 self._path = _unix_path(path) 138 self._path = _unix_path(path)
136 """The path as given in the ctor -- but normalized to have slashes""" 139 """The path as given in the ctor -- but normalized to have slashes"""
137 self._is_symlink = self._is_reg = self._is_dir = self._stat_result = \ 140 self._is_symlink = self._is_reg = self._is_dir = self._stat_result = \
138 self._stat_errno = self._stat_errstr = None 141 self._stat_errno = self._stat_errstr = None
139 self._alt_fsname = self._alt_u8name = _notset 142 self._alt_fsname = self._alt_u8name = _notset
140 143
141 @property 144 @property
142 def name(self): 145 def name(self):
143 """The original name exactly as given in the ctor""" 146 """The original name exactly as given in the ctor"""
144 return self._name 147 return self._name
148
149 @property
150 def npath(self):
151 """The original path exactly as given in the ctor"""
152 return self._npath
145 153
146 @property 154 @property
147 def path(self): 155 def path(self):
148 """The original path exactly as given in the ctor -- but normalized to 156 """The original path exactly as given in the ctor -- but normalized to
149 have forward slashes""" 157 have forward slashes"""
170 if (b'\n' in s) or (b'\r' in s) or (b'\t' in s) or (b'\\' in s): 178 if (b'\n' in s) or (b'\r' in s) or (b'\t' in s) or (b'\\' in s):
171 return None 179 return None
172 return s 180 return s
173 181
174 @property 182 @property
183 def fsnpath(self):
184 """Always bytes.
185
186 Also do not allow TAB, CR or LF in the path.
187
188 :rtype: bytes or None
189
190 """
191 if PY2:
192 if isinstance(self._npath, bytes):
193 p = self._npath
194 try:
195 p = self._npath.encode(_FSENCODING, "strict")
196 except UnicodeError:
197 return None
198 else:
199 p = os.fsencode(self._npath)
200 if (b'\n' in p) or (b'\r' in p) or (b'\t' in p) or (b'\\' in p):
201 return None
202 return p
203
204 @property
175 def fspath(self): 205 def fspath(self):
176 """Always bytes. 206 """Always bytes.
177 207
178 Also do not allow TAB, CR or LF in the path. 208 Also do not allow TAB, CR or LF in the path.
179 209
201 231
202 """ 232 """
203 if self._alt_fsname is _notset: 233 if self._alt_fsname is _notset:
204 self._alt_fsname = WalkDirEntry.alt_fs(self._name) 234 self._alt_fsname = WalkDirEntry.alt_fs(self._name)
205 return self._alt_fsname 235 return self._alt_fsname
236
237 @property
238 def alt_fsnpath(self):
239 """Alternative and "escaped" filesystem path -- always bytes.
240
241 :rtype: bytes
242
243 """
244 return WalkDirEntry.alt_fs(self._npath)
206 245
207 @property 246 @property
208 def alt_fspath(self): 247 def alt_fspath(self):
209 """Alternative and "escaped" filesystem path -- always bytes. 248 """Alternative and "escaped" filesystem path -- always bytes.
210 249
218 # 257 #
219 # Prevent double encoding ... 258 # Prevent double encoding ...
220 # ... and hope that the current FS encoding is compatible 259 # ... and hope that the current FS encoding is compatible
221 # with it 260 # with it
222 # 261 #
223 if isinstance(what, bytes): 262 s = escape_for_output(what)
224 s = (what.replace(b'\\', b"\\\\")
225 .replace(b'\n', b"\\x0a")
226 .replace(b'\r', b"\\x0d")
227 .replace(b'\t', b"\\x09"))
228 else:
229 s = (what.replace(u'\\', u"\\\\")
230 .replace(u'\n', u"\\x0a")
231 .replace(u'\r', u"\\x0d")
232 .replace(u'\t', u"\\x09"))
233 if PY2: 263 if PY2:
234 if isinstance(s, bytes): 264 if isinstance(s, bytes):
235 return s 265 return s
236 else: 266 else:
237 return s.encode(_FSENCODING, "backslashreplace") 267 return s.encode(_FSENCODING, "backslashreplace")
260 except UnicodeError: 290 except UnicodeError:
261 return None 291 return None
262 return self._name 292 return self._name
263 293
264 @property 294 @property
295 def unpath(self):
296 """Always "real", strictly encoded Unicode or `None` if this is not
297 possible.
298
299 :rtype: text or None
300
301 """
302 if PY2:
303 if isinstance(self._npath, bytes):
304 try:
305 return self._npath.decode(_FSENCODING, "strict")
306 except UnicodeError:
307 return None
308 else:
309 return self._npath
310 else:
311 try:
312 self._npath.encode("utf-8", "strict")
313 except UnicodeError:
314 return None
315 return self._npath
316
317 @property
265 def upath(self): 318 def upath(self):
266 """Always "real", strictly encoded Unicode or `None` if this is not 319 """Always "real", strictly encoded Unicode or `None` if this is not
267 possible. 320 possible.
268 321
269 :rtype: text or None 322 :rtype: text or None
297 if (u'\n' in n) or (u'\r' in n) or (u'\t' in n) or (u'\\' in n): 350 if (u'\n' in n) or (u'\r' in n) or (u'\t' in n) or (u'\\' in n):
298 return None 351 return None
299 return n.encode("utf-8", "strict") 352 return n.encode("utf-8", "strict")
300 353
301 @property 354 @property
355 def u8npath(self):
356 """`.unpath` as UTF-8 or `None` (as strict as `upath`.
357
358 Also do not allow TAB, CR or LF in the path.
359
360 """
361 p = self.unpath
362 if p is None:
363 return None
364 if (u'\n' in p) or (u'\r' in p) or (u'\t' in p) or (u'\\' in p):
365 return None
366 return p.encode("utf-8", "strict")
367
368 @property
302 def u8path(self): 369 def u8path(self):
303 """`.upath` as UTF-8 or `None` (as strict as `upath`. 370 """`.upath` as UTF-8 or `None` (as strict as `upath`.
304 371
305 Also do not allow TAB, CR or LF in the path. 372 Also do not allow TAB, CR or LF in the path.
306 373
315 @property 382 @property
316 def alt_u8name(self): 383 def alt_u8name(self):
317 if self._alt_u8name is _notset: 384 if self._alt_u8name is _notset:
318 self._alt_u8name = WalkDirEntry.alt_u8(self._name) 385 self._alt_u8name = WalkDirEntry.alt_u8(self._name)
319 return self._alt_u8name 386 return self._alt_u8name
387
388 @property
389 def alt_u8npath(self):
390 return WalkDirEntry.alt_u8(self._npath)
320 391
321 @property 392 @property
322 def alt_u8path(self): 393 def alt_u8path(self):
323 return WalkDirEntry.alt_u8(self._path) 394 return WalkDirEntry.alt_u8(self._path)
324 395
327 # 398 #
328 # Prevent double encoding ... 399 # Prevent double encoding ...
329 # ... and hope that the current UTF-8 is compatible 400 # ... and hope that the current UTF-8 is compatible
330 # with it 401 # with it
331 # 402 #
332 if isinstance(what, bytes): 403 s = escape_for_output(what)
333 s = (what.replace(b'\\', b"\\\\")
334 .replace(b'\n', b"\\x0a")
335 .replace(b'\r', b"\\x0d")
336 .replace(b'\t', b"\\x09"))
337 else:
338 s = (what.replace(u'\\', u"\\\\")
339 .replace(u'\n', u"\\x0a")
340 .replace(u'\r', u"\\x0d")
341 .replace(u'\t', u"\\x09"))
342 if PY2: 404 if PY2:
343 if isinstance(s, bytes): 405 if isinstance(s, bytes):
344 try: 406 try:
345 return (s.decode(_FSENCODING, "strict") 407 return (s.decode(_FSENCODING, "strict")
346 .encode("utf-8", "strict")) 408 .encode("utf-8", "strict"))
349 .encode("ascii", "backslashreplace")) 411 .encode("ascii", "backslashreplace"))
350 else: 412 else:
351 return s.encode("ascii", "backslashreplace") 413 return s.encode("ascii", "backslashreplace")
352 else: 414 else:
353 return s.encode("utf-8", "backslashreplace") 415 return s.encode("utf-8", "backslashreplace")
416
417 @staticmethod
418 def alt_bytes(what, use_utf8):
419 if not what:
420 return what
421 if use_utf8:
422 return WalkDirEntry.alt_u8(what)
423 else:
424 return WalkDirEntry.alt_fs(what)
425
426 @staticmethod
427 def alt_text(what, use_utf8):
428 b = WalkDirEntry.alt_bytes(what, use_utf8)
429 if PY2:
430 return b
431 return b.decode("iso-8859-1")
354 432
355 @property 433 @property
356 def is_symlink(self): 434 def is_symlink(self):
357 return self._is_symlink 435 return self._is_symlink
358 436