diff cutils/treesum.py @ 270:42f4ca423ab3

treesum: REFACTOR: Major refactoring of computing digests: - Digests that are created by reading symlink targets are created as if the result of readllink() is the content of a regulare file: no special tagging is done now. - All digest values have their canonical algorithm name prepended. - Normalize the order of computation
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 18 Feb 2025 16:07:28 +0100
parents fc002983253c
children 6fe88de236cb
line wrap: on
line diff
--- a/cutils/treesum.py	Tue Feb 18 13:10:05 2025 +0100
+++ b/cutils/treesum.py	Tue Feb 18 16:07:28 2025 +0100
@@ -551,14 +551,14 @@
         if not self._follow_symlinks.command_line and os.path.islink(root):
             linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
             linkdgst = self._algorithm[0]()
-            linkdgst.update(
-                util.interpolate_bytes(
-                    b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
+            linkdgst.update(linktgt.fspath)
             dir_dgst = self._algorithm[0]()
-            dir_dgst.update(b"1:L,")
+            dir_dgst.update(b"2:L@,")
             dir_dgst.update(
                 util.interpolate_bytes(
-                    b"%d:%s,", len(linkdgst.digest()), linkdgst.digest()))
+                    b"%d:%s,%d:%s,",
+                    len(self._algorithm[1]), util.b(self._algorithm[1]),
+                    len(linkdgst.digest()), linkdgst.digest()))
             if self._size_only:
                 self._writer.write_size(b"./@/", 0)
             else:
@@ -627,51 +627,40 @@
                     if self._utf8_mode:
                         if linktgt.u8path is None:
                             dir_tainted = True
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.alt_u8path),
-                                linktgt.alt_u8path))
+                            linkdgst.update(linktgt.alt_u8path)
                         else:
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.u8path),
-                                linktgt.u8path))
+                            linkdgst.update(linktgt.u8path)
                         if fso.u8name is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:S,%d:%s,",
+                                b"2:@/,%d:%s,",
                                 len(fso.alt_u8name),
                                 fso.alt_u8name))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:S,%d:%s,", len(fso.u8name), fso.u8name))
+                                b"2:@/,%d:%s,", len(fso.u8name), fso.u8name))
                     else:
                         if linktgt.fspath is None:
                             dir_tainted = True
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.alt_fspath),
-                                linktgt.alt_fspath))
+                            linkdgst.update(linktgt.alt_fspath)
                         else:
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.fspath),
-                                linktgt.fspath))
+                            linkdgst.update(linktgt.fspath)
                         if fso.fsname is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:S,%d:%s,",
+                                b"2:@/,%d:%s,",
                                 len(fso.alt_fsname),
                                 fso.alt_fsname))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:S,%d:%s,", len(fso.fsname), fso.fsname))
+                                b"2:@/,%d:%s,", len(fso.fsname), fso.fsname))
                     #
                     # - no mtime and no mode for symlinks
                     # - also does not count for dir_size
                     #
                     dir_dgst.update(util.interpolate_bytes(
-                        b"%d:%s,",
+                        b"%d:%s,%d:%s,",
+                        len(self._algorithm[1]), util.b(self._algorithm[1]),
                         len(linkdgst.digest()), linkdgst.digest()))
                     opath = join_output_path(top, fso.name)
                     if self._utf8_mode:
@@ -712,24 +701,22 @@
                         if fso.u8name is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:d,%d:%s,",
+                                b"1:/,%d:%s,",
                                 len(fso.alt_u8name),
                                 fso.alt_u8name))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:d,%d:%s,", len(fso.u8name), fso.u8name))
+                                b"1:/,%d:%s,", len(fso.u8name), fso.u8name))
                     else:
                         if fso.fsname is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:d,%d:%s,",
+                                b"1:/,%d:%s,",
                                 len(fso.alt_fsname),
                                 fso.alt_fsname))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:d,%d:%s,", len(fso.fsname), fso.fsname))
-                    dir_dgst.update(util.interpolate_bytes(
-                        b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst))
+                                b"1:/,%d:%s,", len(fso.fsname), fso.fsname))
                     if self._with_metadata_full_mode:
                         modestr = util.b(normalized_mode_str(fso.stat.st_mode))
                         dir_dgst.update(util.interpolate_bytes(
@@ -739,8 +726,16 @@
                             fso.stat.st_mode))
                         dir_dgst.update(util.interpolate_bytes(
                             b"4:mode,%d:%s,", len(modestr), modestr))
+                    dir_dgst.update(util.interpolate_bytes(
+                        b"%d:%s,%d:%s,",
+                        len(sub_dir_algo), util.b(sub_dir_algo),
+                        len(sub_dir_dgst), sub_dir_dgst))
             else:
                 if fso.is_symlink and not self._follow_symlinks.file:
+                    #
+                    # Symbolic link to some filesystem object which is not
+                    # determined to be a link to a directory.
+                    #
                     linktgt = walk.WalkDirEntry.from_readlink(
                         os.readlink(fso.path))
                     # linktgt = util.fsencode(os.readlink(fso.path)))
@@ -748,51 +743,40 @@
                     if self._utf8_mode:
                         if linktgt.u8path is None:
                             dir_tainted = True
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.alt_u8path),
-                                linktgt.alt_u8path))
+                            linkdgst.update(linktgt.alt_u8path)
                         else:
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.u8path),
-                                linktgt.u8path))
+                            linkdgst.update(linktgt.u8path)
                         if fso.u8name is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:F,%d:%s,",
+                                b"1:@,%d:%s,",
                                 len(fso.alt_u8name),
                                 fso.alt_u8name))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:F,%d:%s,", len(fso.u8name), fso.u8name))
+                                b"1:@,%d:%s,", len(fso.u8name), fso.u8name))
                     else:
                         if linktgt.fspath is None:
                             dir_tainted = True
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.alt_fspath),
-                                linktgt.alt_fspath))
+                            linkdgst.update(linktgt.alt_fspath)
                         else:
-                            linkdgst.update(util.interpolate_bytes(
-                                b"%d:%s,",
-                                len(linktgt.fspath),
-                                linktgt.fspath))
+                            linkdgst.update(linktgt.fspath)
                         if fso.fsname is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:F,%d:%s,",
+                                b"1:@,%d:%s,",
                                 len(fso.alt_fsname),
                                 fso.alt_fsname))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:F,%d:%s,", len(fso.fsname), fso.fsname))
+                                b"1:@,%d:%s,", len(fso.fsname), fso.fsname))
                     #
                     # - no mtime and no mode for symlinks
                     # - also does not count for dir_size
                     #
                     dir_dgst.update(util.interpolate_bytes(
-                        b"%d:%s,",
+                        b"%d:%s,%d:%s,",
+                        len(self._algorithm[1]), util.b(self._algorithm[1]),
                         len(linkdgst.digest()), linkdgst.digest()))
                     opath = join_output_path(top, fso.name)
                     if self._utf8_mode:
@@ -818,22 +802,22 @@
                         if fso.u8name is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:f,%d:%s,",
+                                b"0:,%d:%s,",
                                 len(fso.alt_u8name),
                                 fso.alt_u8name))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:f,%d:%s,", len(fso.u8name), fso.u8name))
+                                b"0:,%d:%s,", len(fso.u8name), fso.u8name))
                     else:
                         if fso.fsname is None:
                             dir_tainted = True
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:f,%d:%s,",
+                                b"0:,%d:%s,",
                                 len(fso.alt_fsname),
                                 fso.alt_fsname))
                         else:
                             dir_dgst.update(util.interpolate_bytes(
-                                b"1:f,%d:%s,", len(fso.fsname), fso.fsname))
+                                b"0:,%d:%s,", len(fso.fsname), fso.fsname))
                     opath = join_output_path(top, fso.name)
                     if self._utf8_mode:
                         opath = walk.WalkDirEntry.alt_u8(opath)
@@ -890,7 +874,11 @@
                                 fso.path,
                                 use_mmap=self._use_mmap)
                             dir_dgst.update(util.interpolate_bytes(
-                                b"%d:%s,", len(dgst), dgst))
+                                b"%d:%s,%d:%s,",
+                                len(self._algorithm[1]),
+                                util.b(self._algorithm[1]),
+                                len(dgst),
+                                dgst))
                         if self._size_only:
                             self._writer.write_size(opath, fso.stat.st_size)
                         else: