changeset 168:bcc4441cf216

Implement "--print-size" to print file and accumulated directory sizes also. NOTE: - Resulting digests (file and directory) are NOT affected. - Symbolic links are not considered.
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 09 Jan 2025 18:02:46 +0100
parents ffd14e2de130
children 91b8b2a8aebc
files cutils/treesum.py
diffstat 1 files changed, 40 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/treesum.py	Thu Jan 09 14:17:01 2025 +0100
+++ b/cutils/treesum.py	Thu Jan 09 18:02:46 2025 +0100
@@ -108,6 +108,12 @@
             help="Do not follow symbolic links given on comment line "
                  "arguments. This is the default.")
         gp.add_argument(
+            "--print-size", action="store_true",
+            help="""Print the size of a file or the accumulated sizes of
+directory content into the output also.
+The size is not considered when computing digests. For symbolic links
+the size is not printed also.""")
+        gp.add_argument(
             "directories", nargs="*", metavar="DIRECTORY")
 
     parser = argparse.ArgumentParser(
@@ -206,7 +212,8 @@
                       mode=False,
                       mmap=None,
                       mtime=False,
-                      output=None):
+                      output=None,
+                      print_size=False):
     opts = argparse.Namespace(
         directories=directories,
         algorithm=(util.algotag2algotype(algorithm),
@@ -221,7 +228,8 @@
         metadata_full_mode=full_mode,
         metadata_mode=mode,
         metadata_mtime=mtime,
-        output=output)
+        output=output,
+        print_size=print_size)
     return opts
 
 
@@ -260,6 +268,7 @@
                 opts.metadata_mode,
                 opts.metadata_full_mode,
                 opts.metadata_mtime,
+                opts.print_size,
                 minimal=opts.minimal,
                 comment=opts.comment)
 
@@ -267,7 +276,7 @@
 def generate_treesum_for_directory(
         outfp, root, algorithm, use_mmap, use_base64, handle_root_logical,
         follow_directory_symlinks, with_metadata_mode, with_metadata_full_mode,
-        with_metadata_mtime,
+        with_metadata_mtime, print_size,
         minimal=None, comment=None):
     """
 
@@ -289,6 +298,8 @@
         flags.append("logical")
     if follow_directory_symlinks:
         flags.append("follow-directory-symlinks")
+    if print_size:
+        flags.append("print-size")
     if flags:
         outfp.write(format_bsd_line("FLAGS", ",".join(flags), None, False))
         outfp.flush()
@@ -336,6 +347,8 @@
             root,
             follow_symlinks=follow_directory_symlinks):
         dir_dgst = algorithm[0]()
+        dir_size = 0
+
         for fso in fsobjects:
             if fso.is_dir:
                 if fso.is_symlink and not follow_directory_symlinks:
@@ -358,7 +371,8 @@
                     outfp.flush()
                     continue
                 # fetch from dir_digests
-                dgst = dir_digests[top + (fso.name,)]
+                dgst, dsz = dir_digests[top + (fso.name,)]
+                dir_size += dsz
                 dir_dgst.update(b"1:d,%d:%s," % (len(fso.fsname), fso.fsname))
                 dir_dgst.update(
                     b"%d:%s," % (len(dgst), dgst))
@@ -375,6 +389,7 @@
                     dir_dgst.update(b"4:mode,%d:%s," % (len(modestr), modestr))
             else:
                 dir_dgst.update(b"1:f,%d:%s," % (len(fso.fsname), fso.fsname))
+                dir_size += fso.stat.st_size
                 if with_metadata_mtime:
                     mtime = datetime.datetime.utcfromtimestamp(
                         int(fso.stat.st_mtime))
@@ -397,15 +412,25 @@
                     algorithm[0], fso.path, use_mmap=use_mmap)
                 dir_dgst.update(b"%d:%s," % (len(dgst), dgst))
                 opath = "/".join(top) + "/" + fso.name if top else fso.name
-                outfp.write(
-                    format_bsd_line(
-                        algorithm[1], dgst, opath, use_base64))
+                if print_size:
+                    outfp.write(
+                        format_bsd_line(
+                            algorithm[1], dgst, opath, use_base64,
+                            fso.stat.st_size))
+                else:
+                    outfp.write(
+                        format_bsd_line(
+                            algorithm[1], dgst, opath, use_base64))
                 outfp.flush()
         opath = "/".join(top) + "/" if top else ""
-        outfp.write(format_bsd_line(
-            algorithm[1], dir_dgst.digest(), opath, use_base64))
+        if print_size:
+            outfp.write(format_bsd_line(
+                algorithm[1], dir_dgst.digest(), opath, use_base64, dir_size))
+        else:
+            outfp.write(format_bsd_line(
+                algorithm[1], dir_dgst.digest(), opath, use_base64))
         outfp.flush()
-        dir_digests[top] = dir_dgst.digest()
+        dir_digests[top] = (dir_dgst.digest(), dir_size)
 
 
 def normalized_compatible_mode_str(mode):
@@ -424,7 +449,7 @@
     return modestr
 
 
-def format_bsd_line(digestname, value, filename, use_base64):
+def format_bsd_line(digestname, value, filename, use_base64, size=None):
     ls = os.linesep if isinstance(os.linesep, bytes) \
         else os.linesep.encode("utf-8")
     if not isinstance(digestname, bytes):
@@ -452,7 +477,10 @@
         value = binascii.hexlify(value)
     if filename != b"./@":
         filename = util.normalize_filename(filename, True)
-    return b"%s (%s) = %s%s" % (digestname, filename, value, ls)
+    if size is None:
+        return b"%s (%s) = %s%s" % (digestname, filename, value, ls)
+    else:
+        return b"%s (%s) = %s,%d%s" % (digestname, filename, value, size, ls)
 
 
 if __name__ == "__main__":