changeset 308:652870b20f9e

treesum: Implement --accept-treesum: trust a treesum-file for a directory checksum
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 08 Mar 2025 04:49:06 +0100
parents 64df94bf4659
children 553d6f7309d9
files cutils/treesum.py cutils/util/fnmatch.py
diffstat 2 files changed, 172 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/treesum.py	Fri Mar 07 14:22:22 2025 +0100
+++ b/cutils/treesum.py	Sat Mar 08 04:49:06 2025 +0100
@@ -45,6 +45,14 @@
 
         """
         gp.add_argument(
+            "--accept-treesum", "-A", action=PatternMatchAction,
+            kind="accept-treesum",
+            dest="fnmatch_filters", metavar="PATTERN",
+            help="""Accept an existing treesum file PATTERN for a directory
+checksum.
+Implicitly this also acts as `--exclude' option.
+Can be given more than once.""")
+        gp.add_argument(
             "--algorithm", "-a", action="store", type=util.argv2algo,
             help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), "
                  "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, "
@@ -69,7 +77,8 @@
             "--exclude", "-X", action=PatternMatchAction, kind="exclude",
             dest="fnmatch_filters", metavar="PATTERN",
             help="""Exclude names matching the given PATTERN.
-For help on PATTERN use \"help patterns\".""")
+For help on PATTERN use \"help patterns\".
+Can be given more than once.""")
         gp.add_argument(
             "--follow-directory-symlinks", "-l", action=SymlinkAction,
             const="follow-directory-symlinks",
@@ -108,7 +117,8 @@
             "--include", "-I", action=PatternMatchAction, kind="include",
             dest="fnmatch_filters", metavar="PATTERN",
             help="""Include names matching the given PATTERN.
-For help on PATTERN use \"help patterns\".""")
+For help on PATTERN use \"help patterns\".
+Can be given more than once.""")
         gp.add_argument(
             "--logical", "-L", action=SymlinkAction, dest="follow_symlinks",
             const=FollowSymlinkConfig(True, True, True),
@@ -387,9 +397,11 @@
         self.__kind = kwargs.pop("kind", None)
         if self.__kind is None:
             raise argparse.ArgumentError(None, "`kind' is required")
-        if self.__kind not in ("exclude", "include"):
+        if self.__kind not in ("exclude", "include", "accept-treesum"):
             raise argparse.ArgumentError(
-                None, "`kind' must be one of `include' or `exclude'")
+                None,
+                "`kind' must be one of `include', `exclude' or"
+                " `accept-treesum'")
 
         super(PatternMatchAction, self).__init__(*args, **kwargs)
 
@@ -432,7 +444,7 @@
             if f[0] not in ("exclude", "include"):
                 raise ValueError(
                     "every kind of every item in `fnmatch_filters' must be"
-                    " \"include\" or \"exclude\""
+                    " \"include\", \"exclude\" or \"accept-treesum\""
                 )
 
     # Not following symlinks to files is not yet supported: reset to True
@@ -704,6 +716,104 @@
                 self._writer.write_file_digest(self._algorithm[1], opath, None)
             self._writer.flush()
             return (e.errno, None, None, None)
+
+        # Check whether to accept existing treesum digest files
+        if self._fnmatcher:
+            for fso in fsobjects:
+                fpath = join_output_path(top, fso.name)
+                if self._fnmatcher.shall_accept_treesum(fpath):
+                    # Yes we have hit a treesum digest file
+                    logging.debug("Accepting existing treesum from: %s", fpath)
+                    collector = DigestSizeCollector()
+                    try:
+                        collector.collect_from_file(os.path.join(root, fpath))
+                    except OSError as e:
+                        eno = e.errno
+                        emsg = e.strerror
+                    except Exception as e:
+                        # XXX FIXME: other EIO, EBADF, EFAULT
+                        eno = errno.ESRCH
+                        emsg = str(e)
+                    else:
+                        eno = 0
+                        emsg = None
+                    if self._utf8_mode:
+                        fpath = walk.WalkDirEntry.alt_u8(fpath)
+                    else:
+                        fpath = walk.WalkDirEntry.alt_fs(fpath)
+                    opath = join_output_path(top, None)
+                    if self._utf8_mode:
+                        opath = walk.WalkDirEntry.alt_u8(opath)
+                    else:
+                        opath = walk.WalkDirEntry.alt_fs(opath)
+                    if eno == 0:
+                        if self._size_only:
+                            if collector.size is None:
+                                # This is a severe error here
+                                self._writer.write_error(util.b(
+                                    util.interpolate_bytes(
+                                        "No size in treesum-file `%s' while"
+                                        " requiring it",
+                                        fpath),
+                                    "utf-8"))
+                                self._writer.write_size(opath, None)
+                                return (errno.ESRCH, None, None, None)
+                        else:
+                            if self._print_size:
+                                if collector.size is None:
+                                    #
+                                    # XXX FIXME: Is this a **severe** error
+                                    #            here? Currently: no
+                                    #
+                                    self._writer.write_error(util.b(
+                                        util.interpolate_bytes(
+                                            "No size in treesum-file `%s'",
+                                            fpath),
+                                        "utf-8"))
+                                    sz = ""
+                                else:
+                                    sz = collector.size
+                            else:
+                                sz = None
+                            if collector.digest is None:
+                                # This is really a severe error
+                                self._writer.write_error(util.b(
+                                    util.interpolate_bytes(
+                                        "No digest in treesum-file `%s' while"
+                                        " it is required",
+                                        fpath),
+                                    "utf-8"))
+                                self._writer.write_file_digest(
+                                    collector.algorithm or "MD5",
+                                    opath,
+                                    None,
+                                    use_base64=self._use_base64,
+                                    size=sz)
+                                return (errno.ESRCH, None, None, None)
+                        if self._size_only:
+                            self._writer.write_size(opath, collector.size)
+                        else:
+                            self._writer.write_file_digest(
+                                collector.algorithm, opath, collector.digest,
+                                use_base64=self._use_base64, size=sz)
+                        return (0,
+                                collector.algorithm,
+                                collector.digest,
+                                collector.size)
+                    else:
+                        self._writer.write_error(util.interpolate_bytes(
+                            "Cannot read treesum-file `%s' for directory"
+                            "`%s': %s",
+                            fpath,
+                            opath,
+                            util.b(emsg, "utf-8")))
+                        if self._size_only:
+                            self._writer.write_size(opath, None)
+                        else:
+                            self._writer.write_file_digest(
+                                self._algorithm[1], opath, None,
+                                use_base64=self._use_base64, size=None)
+                        return (eno, None, None, None)
         if self._utf8_mode:
             fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8)
         else:
@@ -898,7 +1008,11 @@
                         # Yes -- skipped
                         continue
                     if sub_dir_errno == 0:
-                        dir_size += sub_dir_size
+                        if sub_dir_size is None:
+                            if self._print_size or self._size_only:
+                                dir_tainted = True
+                        else:
+                            dir_size += (sub_dir_size or 0)
                     else:
                         dir_tainted = True
                     dir_dgst.update(util.interpolate_bytes(
@@ -1645,5 +1759,33 @@
         print("    Errors: <none>")
 
 
+class DigestSizeCollector(object):
+
+    def __init__(self):
+        self._algorithm = self._digest = self._size = None
+
+    def __call__(self, block_no, tag, generator, fsencoding, flags,
+                 fnmatch_filters, comments, errors,
+                 algorithm, digest, size):
+        self._algorithm = algorithm
+        self._digest = digest
+        self._size = size
+
+    @property
+    def algorithm(self):
+        return self._algorithm
+
+    @property
+    def digest(self):
+        return self._digest
+
+    @property
+    def size(self):
+        return self._size
+
+    def collect_from_file(self, digest_file):
+        get_infos_from_digestfile([digest_file], self, True)
+
+
 if __name__ == "__main__":
     sys.exit(main())
--- a/cutils/util/fnmatch.py	Fri Mar 07 14:22:22 2025 +0100
+++ b/cutils/util/fnmatch.py	Sat Mar 08 04:49:06 2025 +0100
@@ -15,6 +15,7 @@
 
 import re
 
+from . import PY2
 from . import glob
 
 
@@ -95,8 +96,6 @@
         "fullpath": fullpath_factory,
     }
 
-    VISIT_DEFAULT = True    # Overall default value for visiting
-
     def __init__(self, matchers):
         super(FnMatcher, self).__init__()
         self._matchers = matchers
@@ -106,6 +105,7 @@
         matchers = []
         if filter_definitions:
             for action, kpattern in filter_definitions:
+                assert action in ("include", "exclude", "accept-treesum")
                 kind, sep, pattern = kpattern.partition(':')
                 if not sep:
                     # use the default
@@ -117,19 +117,36 @@
                 matchers.append((action, kind, factory(pattern), pattern))
         return klass(matchers)
 
-    def shall_visit(self, fn, default=None):
-        visit = default if default is not None else self.VISIT_DEFAULT
+    def shall_visit(self, fn, default=True):
+        visit = default
         for action, kind, matcher, orig_pattern in self._matchers:
-            res = matcher(fn)
-            if res:
+            if matcher(fn):
                 if action == "include":
                     visit = True
-                elif action == "exclude":
+                elif action in ("exclude", "accept-treesum"):
                     visit = False
                 else:
                     raise RuntimeError("unknown action: {}".format(action))
         return visit
 
+    def shall_accept_treesum(self, fn, default=False):
+        accept = default
+        for action, kind, matcher, orig_pattern in self._matchers:
+            if action == "accept-treesum":
+                if matcher(fn):
+                    accept = True
+            elif action in ("include", "exclude"):
+                pass
+            else:
+                raise RuntimeError("unknown action: {}".format(action))
+        return accept
+
     def definitions(self):
         for action, kind, matcher, orig_pattern in self._matchers:
             yield (action, kind, orig_pattern)
+
+    def __bool__(self):
+        return bool(self._matchers)
+
+    if PY2:
+        __nonzero__ = __bool__