comparison cutils/treesum.py @ 202:b9b38584919b

First preparations to implement an UTF-8-mode for treeview
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 21 Jan 2025 14:51:58 +0100
parents 22f92bf3572c
children 07f1d79e6674
comparison
equal deleted inserted replaced
201:58d93453c307 202:b9b38584919b
120 the size is not printed also.""") 120 the size is not printed also.""")
121 gp.add_argument( 121 gp.add_argument(
122 "--size-only", action="store_true", 122 "--size-only", action="store_true",
123 help="""Print only the size of files and for each directory its 123 help="""Print only the size of files and for each directory its
124 accumulated directory size. Digests are not computed.""") 124 accumulated directory size. Digests are not computed.""")
125 gp.add_argument(
126 "--utf8", "--utf-8", action="store_true",
127 help="""Encode all file paths using UTF-8 instead of
128 the filesystem encoding. Add some error tag into the path if it cannot
129 representated in Unicode cleanly.""")
125 gp.add_argument( 130 gp.add_argument(
126 "directories", nargs="*", metavar="DIRECTORY") 131 "directories", nargs="*", metavar="DIRECTORY")
127 132
128 def _populate_info_arguments(ip): 133 def _populate_info_arguments(ip):
129 ip.add_argument( 134 ip.add_argument(
249 mode=False, 254 mode=False,
250 mmap=None, 255 mmap=None,
251 mtime=False, 256 mtime=False,
252 output=None, 257 output=None,
253 print_size=False, 258 print_size=False,
254 size_only=False): 259 size_only=False,
260 utf8=False):
255 opts = argparse.Namespace( 261 opts = argparse.Namespace(
256 directories=directories, 262 directories=directories,
257 algorithm=util.argv2algo(algorithm), 263 algorithm=util.argv2algo(algorithm),
258 append_output=append_output, 264 append_output=append_output,
259 base64=base64, 265 base64=base64,
265 metadata_full_mode=full_mode, 271 metadata_full_mode=full_mode,
266 metadata_mode=mode, 272 metadata_mode=mode,
267 metadata_mtime=mtime, 273 metadata_mtime=mtime,
268 output=output, 274 output=output,
269 print_size=print_size, 275 print_size=print_size,
270 size_only=size_only) 276 size_only=size_only,
277 utf8=utf8)
271 return opts 278 return opts
272 279
273 280
274 def gen_info_opts(digest_files=[], last=False): 281 def gen_info_opts(digest_files=[], last=False):
275 opts = argparse.Namespace( 282 opts = argparse.Namespace(
310 317
311 with out_cm as outfp: 318 with out_cm as outfp:
312 for d in opts.directories: 319 for d in opts.directories:
313 320
314 V1DirectoryTreesumGenerator( 321 V1DirectoryTreesumGenerator(
315 opts.algorithm, opts.mmap, opts.base64, opts.logical, 322 opts.algorithm, opts.mmap, opts.base64,
316 opts.follow_directory_symlinks, 323 opts.logical, opts.follow_directory_symlinks,
317 opts.metadata_mode, 324 opts.metadata_mode,
318 opts.metadata_full_mode, 325 opts.metadata_full_mode,
319 opts.metadata_mtime, 326 opts.metadata_mtime,
320 opts.size_only, 327 opts.size_only,
321 opts.print_size, 328 opts.print_size,
329 opts.utf8,
322 minimal=opts.minimal).generate( 330 minimal=opts.minimal).generate(
323 outfp, d, comment=opts.comment) 331 outfp, d, comment=opts.comment)
324 332
325 333
326 class V1DirectoryTreesumGenerator(object): 334 class V1DirectoryTreesumGenerator(object):
327 335
328 def __init__(self, algorithm, use_mmap, use_base64, 336 def __init__(self, algorithm, use_mmap, use_base64,
329 handle_root_logical, follow_directory_symlinks, 337 handle_root_logical, follow_directory_symlinks,
330 with_metadata_mode, with_metadata_full_mode, 338 with_metadata_mode, with_metadata_full_mode,
331 with_metadata_mtime, size_only, print_size, 339 with_metadata_mtime, size_only, print_size, utf8_mode,
332 minimal=None,): 340 minimal=None,):
333 super(V1DirectoryTreesumGenerator, self).__init__() 341 super(V1DirectoryTreesumGenerator, self).__init__()
334 self._algorithm = algorithm 342 self._algorithm = algorithm
335 self._use_mmap = use_mmap 343 self._use_mmap = use_mmap
336 self._use_base64 = use_base64 344 self._use_base64 = use_base64
339 self._with_metadata_mode = with_metadata_mode 347 self._with_metadata_mode = with_metadata_mode
340 self._with_metadata_full_mode = with_metadata_full_mode 348 self._with_metadata_full_mode = with_metadata_full_mode
341 self._with_metadata_mtime = with_metadata_mtime 349 self._with_metadata_mtime = with_metadata_mtime
342 self._size_only = size_only 350 self._size_only = size_only
343 self._print_size = print_size 351 self._print_size = print_size
352 self._utf8_mode = utf8_mode
344 self._minimal = minimal 353 self._minimal = minimal
345 354
346 def generate(self, outfp, root, comment=None): 355 def generate(self, outfp, root, comment=None):
347 """ 356 """
348 357
371 flags.append("logical") 380 flags.append("logical")
372 if self._follow_directory_symlinks: 381 if self._follow_directory_symlinks:
373 flags.append("follow-directory-symlinks") 382 flags.append("follow-directory-symlinks")
374 if self._size_only: 383 if self._size_only:
375 flags.append("size-only") 384 flags.append("size-only")
385 if self._utf8_mode:
386 flags.append("utf8-mode")
376 else: 387 else:
377 if self._print_size: 388 if self._print_size:
378 flags.append("print-size") 389 flags.append("print-size")
379 if flags: 390 if flags:
380 flags.sort() 391 flags.sort()
399 else: 410 else:
400 self._outfp.write(format_bsd_line("ROOT", None, root, False)) 411 self._outfp.write(format_bsd_line("ROOT", None, root, False))
401 self._outfp.flush() 412 self._outfp.flush()
402 413
403 if not self._handle_root_logical and os.path.islink(root): 414 if not self._handle_root_logical and os.path.islink(root):
404 linktgt = util.fsencode(os.readlink(root)) 415 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
405 linkdgst = self._algorithm[0]() 416 linkdgst = self._algorithm[0]()
406 linkdgst.update( 417 linkdgst.update(
407 util.interpolate_bytes(b"%d:%s,", len(linktgt), linktgt)) 418 util.interpolate_bytes(
419 b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
408 dir_dgst = self._algorithm[0]() 420 dir_dgst = self._algorithm[0]()
409 dir_dgst.update(b"1:L,") 421 dir_dgst.update(b"1:L,")
410 dir_dgst.update( 422 dir_dgst.update(
411 util.interpolate_bytes( 423 util.interpolate_bytes(
412 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) 424 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest()))
437 def _generate(self, root, top): 449 def _generate(self, root, top):
438 logging.debug("Handling %s/%r", root, top) 450 logging.debug("Handling %s/%r", root, top)
439 path = os.path.join(root, *top) if top else root 451 path = os.path.join(root, *top) if top else root
440 with walk.ScanDir(path) as dirscan: 452 with walk.ScanDir(path) as dirscan:
441 fsobjects = list(dirscan) 453 fsobjects = list(dirscan)
442 fsobjects.sort(key=walk.WalkDirEntry.sort_key) 454 if self._utf8_mode:
455 fsobjects.sort(key=walk.WalkDirEntry.alt_sort_key)
456 else:
457 fsobjects.sort(key=walk.WalkDirEntry.sort_key)
443 dir_dgst = self._algorithm[0]() 458 dir_dgst = self._algorithm[0]()
444 dir_size = 0 459 dir_size = 0
445 for fso in fsobjects: 460 for fso in fsobjects:
446 if fso.is_dir: 461 if fso.is_dir:
447 if fso.is_symlink and not self._follow_directory_symlinks: 462 if fso.is_symlink and not self._follow_directory_symlinks:
448 linktgt = util.fsencode(os.readlink(fso.path)) 463 linktgt = walk.WalkDirEntry.from_readlink(
464 os.readlink(fso.path))
465 # linktgt = util.fsencode(os.readlink(fso.path)))
449 linkdgst = self._algorithm[0]() 466 linkdgst = self._algorithm[0]()
450 linkdgst.update( 467 linkdgst.update(
451 util.interpolate_bytes( 468 util.interpolate_bytes(
452 b"%d:%s,", len(linktgt), linktgt)) 469 b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
453 dir_dgst.update(util.interpolate_bytes( 470 dir_dgst.update(util.interpolate_bytes(
454 b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) 471 b"1:S,%d:%s,", len(fso.fsname), fso.fsname))
455 # 472 #
456 # - no mtime and no mode for symlinks 473 # - no mtime and no mode for symlinks
457 # - also does not count for dir_size 474 # - also does not count for dir_size