Mercurial > hgrepos > Python > apps > py-cutils
diff cutils/treesum.py @ 270:42f4ca423ab3
treesum: REFACTOR: Major refactoring of computing digests:
- Digests that are created by reading symlink targets are created as if the
result of readllink() is the content of a regulare file: no special
tagging is done now.
- All digest values have their canonical algorithm name prepended.
- Normalize the order of computation
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 18 Feb 2025 16:07:28 +0100 |
| parents | fc002983253c |
| children | 6fe88de236cb |
line wrap: on
line diff
--- a/cutils/treesum.py Tue Feb 18 13:10:05 2025 +0100 +++ b/cutils/treesum.py Tue Feb 18 16:07:28 2025 +0100 @@ -551,14 +551,14 @@ if not self._follow_symlinks.command_line and os.path.islink(root): linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) linkdgst = self._algorithm[0]() - linkdgst.update( - util.interpolate_bytes( - b"%d:%s,", len(linktgt.fspath), linktgt.fspath)) + linkdgst.update(linktgt.fspath) dir_dgst = self._algorithm[0]() - dir_dgst.update(b"1:L,") + dir_dgst.update(b"2:L@,") dir_dgst.update( util.interpolate_bytes( - b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) + b"%d:%s,%d:%s,", + len(self._algorithm[1]), util.b(self._algorithm[1]), + len(linkdgst.digest()), linkdgst.digest())) if self._size_only: self._writer.write_size(b"./@/", 0) else: @@ -627,51 +627,40 @@ if self._utf8_mode: if linktgt.u8path is None: dir_tainted = True - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.alt_u8path), - linktgt.alt_u8path)) + linkdgst.update(linktgt.alt_u8path) else: - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.u8path), - linktgt.u8path)) + linkdgst.update(linktgt.u8path) if fso.u8name is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:S,%d:%s,", + b"2:@/,%d:%s,", len(fso.alt_u8name), fso.alt_u8name)) else: dir_dgst.update(util.interpolate_bytes( - b"1:S,%d:%s,", len(fso.u8name), fso.u8name)) + b"2:@/,%d:%s,", len(fso.u8name), fso.u8name)) else: if linktgt.fspath is None: dir_tainted = True - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.alt_fspath), - linktgt.alt_fspath)) + linkdgst.update(linktgt.alt_fspath) else: - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.fspath), - linktgt.fspath)) + linkdgst.update(linktgt.fspath) if fso.fsname is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:S,%d:%s,", + b"2:@/,%d:%s,", len(fso.alt_fsname), fso.alt_fsname)) else: dir_dgst.update(util.interpolate_bytes( - b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) + b"2:@/,%d:%s,", len(fso.fsname), fso.fsname)) # # - no mtime and no mode for symlinks # - also does not count for dir_size # dir_dgst.update(util.interpolate_bytes( - b"%d:%s,", + b"%d:%s,%d:%s,", + len(self._algorithm[1]), util.b(self._algorithm[1]), len(linkdgst.digest()), linkdgst.digest())) opath = join_output_path(top, fso.name) if self._utf8_mode: @@ -712,24 +701,22 @@ if fso.u8name is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:d,%d:%s,", + b"1:/,%d:%s,", len(fso.alt_u8name), fso.alt_u8name)) else: dir_dgst.update(util.interpolate_bytes( - b"1:d,%d:%s,", len(fso.u8name), fso.u8name)) + b"1:/,%d:%s,", len(fso.u8name), fso.u8name)) else: if fso.fsname is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:d,%d:%s,", + b"1:/,%d:%s,", len(fso.alt_fsname), fso.alt_fsname)) else: dir_dgst.update(util.interpolate_bytes( - b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) - dir_dgst.update(util.interpolate_bytes( - b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst)) + b"1:/,%d:%s,", len(fso.fsname), fso.fsname)) if self._with_metadata_full_mode: modestr = util.b(normalized_mode_str(fso.stat.st_mode)) dir_dgst.update(util.interpolate_bytes( @@ -739,8 +726,16 @@ fso.stat.st_mode)) dir_dgst.update(util.interpolate_bytes( b"4:mode,%d:%s,", len(modestr), modestr)) + dir_dgst.update(util.interpolate_bytes( + b"%d:%s,%d:%s,", + len(sub_dir_algo), util.b(sub_dir_algo), + len(sub_dir_dgst), sub_dir_dgst)) else: if fso.is_symlink and not self._follow_symlinks.file: + # + # Symbolic link to some filesystem object which is not + # determined to be a link to a directory. + # linktgt = walk.WalkDirEntry.from_readlink( os.readlink(fso.path)) # linktgt = util.fsencode(os.readlink(fso.path))) @@ -748,51 +743,40 @@ if self._utf8_mode: if linktgt.u8path is None: dir_tainted = True - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.alt_u8path), - linktgt.alt_u8path)) + linkdgst.update(linktgt.alt_u8path) else: - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.u8path), - linktgt.u8path)) + linkdgst.update(linktgt.u8path) if fso.u8name is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:F,%d:%s,", + b"1:@,%d:%s,", len(fso.alt_u8name), fso.alt_u8name)) else: dir_dgst.update(util.interpolate_bytes( - b"1:F,%d:%s,", len(fso.u8name), fso.u8name)) + b"1:@,%d:%s,", len(fso.u8name), fso.u8name)) else: if linktgt.fspath is None: dir_tainted = True - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.alt_fspath), - linktgt.alt_fspath)) + linkdgst.update(linktgt.alt_fspath) else: - linkdgst.update(util.interpolate_bytes( - b"%d:%s,", - len(linktgt.fspath), - linktgt.fspath)) + linkdgst.update(linktgt.fspath) if fso.fsname is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:F,%d:%s,", + b"1:@,%d:%s,", len(fso.alt_fsname), fso.alt_fsname)) else: dir_dgst.update(util.interpolate_bytes( - b"1:F,%d:%s,", len(fso.fsname), fso.fsname)) + b"1:@,%d:%s,", len(fso.fsname), fso.fsname)) # # - no mtime and no mode for symlinks # - also does not count for dir_size # dir_dgst.update(util.interpolate_bytes( - b"%d:%s,", + b"%d:%s,%d:%s,", + len(self._algorithm[1]), util.b(self._algorithm[1]), len(linkdgst.digest()), linkdgst.digest())) opath = join_output_path(top, fso.name) if self._utf8_mode: @@ -818,22 +802,22 @@ if fso.u8name is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:f,%d:%s,", + b"0:,%d:%s,", len(fso.alt_u8name), fso.alt_u8name)) else: dir_dgst.update(util.interpolate_bytes( - b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) + b"0:,%d:%s,", len(fso.u8name), fso.u8name)) else: if fso.fsname is None: dir_tainted = True dir_dgst.update(util.interpolate_bytes( - b"1:f,%d:%s,", + b"0:,%d:%s,", len(fso.alt_fsname), fso.alt_fsname)) else: dir_dgst.update(util.interpolate_bytes( - b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) + b"0:,%d:%s,", len(fso.fsname), fso.fsname)) opath = join_output_path(top, fso.name) if self._utf8_mode: opath = walk.WalkDirEntry.alt_u8(opath) @@ -890,7 +874,11 @@ fso.path, use_mmap=self._use_mmap) dir_dgst.update(util.interpolate_bytes( - b"%d:%s,", len(dgst), dgst)) + b"%d:%s,%d:%s,", + len(self._algorithm[1]), + util.b(self._algorithm[1]), + len(dgst), + dgst)) if self._size_only: self._writer.write_size(opath, fso.stat.st_size) else:
