changeset 202:b9b38584919b

First preparations to implement an UTF-8-mode for treeview
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 21 Jan 2025 14:51:58 +0100
parents 58d93453c307
children 3a85f7bbe0b1
files cutils/treesum.py
diffstat 1 files changed, 27 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/treesum.py	Tue Jan 21 14:30:06 2025 +0100
+++ b/cutils/treesum.py	Tue Jan 21 14:51:58 2025 +0100
@@ -123,6 +123,11 @@
             help="""Print only the size of files and for each directory its
 accumulated directory size. Digests are not computed.""")
         gp.add_argument(
+            "--utf8", "--utf-8", action="store_true",
+            help="""Encode all file paths using UTF-8 instead of
+the filesystem encoding. Add some error tag into the path if it cannot
+representated in Unicode cleanly.""")
+        gp.add_argument(
             "directories", nargs="*", metavar="DIRECTORY")
 
     def _populate_info_arguments(ip):
@@ -251,7 +256,8 @@
                       mtime=False,
                       output=None,
                       print_size=False,
-                      size_only=False):
+                      size_only=False,
+                      utf8=False):
     opts = argparse.Namespace(
         directories=directories,
         algorithm=util.argv2algo(algorithm),
@@ -267,7 +273,8 @@
         metadata_mtime=mtime,
         output=output,
         print_size=print_size,
-        size_only=size_only)
+        size_only=size_only,
+        utf8=utf8)
     return opts
 
 
@@ -312,13 +319,14 @@
         for d in opts.directories:
 
             V1DirectoryTreesumGenerator(
-                opts.algorithm, opts.mmap, opts.base64, opts.logical,
-                opts.follow_directory_symlinks,
+                opts.algorithm, opts.mmap, opts.base64,
+                opts.logical, opts.follow_directory_symlinks,
                 opts.metadata_mode,
                 opts.metadata_full_mode,
                 opts.metadata_mtime,
                 opts.size_only,
                 opts.print_size,
+                opts.utf8,
                 minimal=opts.minimal).generate(
                     outfp, d, comment=opts.comment)
 
@@ -328,7 +336,7 @@
     def __init__(self, algorithm, use_mmap, use_base64,
                  handle_root_logical, follow_directory_symlinks,
                  with_metadata_mode, with_metadata_full_mode,
-                 with_metadata_mtime, size_only, print_size,
+                 with_metadata_mtime, size_only, print_size, utf8_mode,
                  minimal=None,):
         super(V1DirectoryTreesumGenerator, self).__init__()
         self._algorithm = algorithm
@@ -341,6 +349,7 @@
         self._with_metadata_mtime = with_metadata_mtime
         self._size_only = size_only
         self._print_size = print_size
+        self._utf8_mode = utf8_mode
         self._minimal = minimal
 
     def generate(self, outfp, root, comment=None):
@@ -373,6 +382,8 @@
             flags.append("follow-directory-symlinks")
         if self._size_only:
             flags.append("size-only")
+        if self._utf8_mode:
+            flags.append("utf8-mode")
         else:
             if self._print_size:
                 flags.append("print-size")
@@ -401,10 +412,11 @@
         self._outfp.flush()
 
         if not self._handle_root_logical and os.path.islink(root):
-            linktgt = util.fsencode(os.readlink(root))
+            linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
             linkdgst = self._algorithm[0]()
             linkdgst.update(
-                util.interpolate_bytes(b"%d:%s,", len(linktgt), linktgt))
+                util.interpolate_bytes(
+                    b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
             dir_dgst = self._algorithm[0]()
             dir_dgst.update(b"1:L,")
             dir_dgst.update(
@@ -439,17 +451,22 @@
         path = os.path.join(root, *top) if top else root
         with walk.ScanDir(path) as dirscan:
             fsobjects = list(dirscan)
-        fsobjects.sort(key=walk.WalkDirEntry.sort_key)
+        if self._utf8_mode:
+            fsobjects.sort(key=walk.WalkDirEntry.alt_sort_key)
+        else:
+            fsobjects.sort(key=walk.WalkDirEntry.sort_key)
         dir_dgst = self._algorithm[0]()
         dir_size = 0
         for fso in fsobjects:
             if fso.is_dir:
                 if fso.is_symlink and not self._follow_directory_symlinks:
-                    linktgt = util.fsencode(os.readlink(fso.path))
+                    linktgt = walk.WalkDirEntry.from_readlink(
+                        os.readlink(fso.path))
+                    # linktgt = util.fsencode(os.readlink(fso.path)))
                     linkdgst = self._algorithm[0]()
                     linkdgst.update(
                         util.interpolate_bytes(
-                            b"%d:%s,", len(linktgt), linktgt))
+                            b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
                     dir_dgst.update(util.interpolate_bytes(
                         b"1:S,%d:%s,", len(fso.fsname), fso.fsname))
                     #