Mercurial > hgrepos > Python > apps > py-cutils
changeset 308:652870b20f9e
treesum: Implement --accept-treesum: trust a treesum-file for a directory checksum
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 08 Mar 2025 04:49:06 +0100 |
| parents | 64df94bf4659 |
| children | 553d6f7309d9 |
| files | cutils/treesum.py cutils/util/fnmatch.py |
| diffstat | 2 files changed, 172 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/cutils/treesum.py Fri Mar 07 14:22:22 2025 +0100 +++ b/cutils/treesum.py Sat Mar 08 04:49:06 2025 +0100 @@ -45,6 +45,14 @@ """ gp.add_argument( + "--accept-treesum", "-A", action=PatternMatchAction, + kind="accept-treesum", + dest="fnmatch_filters", metavar="PATTERN", + help="""Accept an existing treesum file PATTERN for a directory +checksum. +Implicitly this also acts as `--exclude' option. +Can be given more than once.""") + gp.add_argument( "--algorithm", "-a", action="store", type=util.argv2algo, help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), " "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, " @@ -69,7 +77,8 @@ "--exclude", "-X", action=PatternMatchAction, kind="exclude", dest="fnmatch_filters", metavar="PATTERN", help="""Exclude names matching the given PATTERN. -For help on PATTERN use \"help patterns\".""") +For help on PATTERN use \"help patterns\". +Can be given more than once.""") gp.add_argument( "--follow-directory-symlinks", "-l", action=SymlinkAction, const="follow-directory-symlinks", @@ -108,7 +117,8 @@ "--include", "-I", action=PatternMatchAction, kind="include", dest="fnmatch_filters", metavar="PATTERN", help="""Include names matching the given PATTERN. -For help on PATTERN use \"help patterns\".""") +For help on PATTERN use \"help patterns\". +Can be given more than once.""") gp.add_argument( "--logical", "-L", action=SymlinkAction, dest="follow_symlinks", const=FollowSymlinkConfig(True, True, True), @@ -387,9 +397,11 @@ self.__kind = kwargs.pop("kind", None) if self.__kind is None: raise argparse.ArgumentError(None, "`kind' is required") - if self.__kind not in ("exclude", "include"): + if self.__kind not in ("exclude", "include", "accept-treesum"): raise argparse.ArgumentError( - None, "`kind' must be one of `include' or `exclude'") + None, + "`kind' must be one of `include', `exclude' or" + " `accept-treesum'") super(PatternMatchAction, self).__init__(*args, **kwargs) @@ -432,7 +444,7 @@ if f[0] not in ("exclude", "include"): raise ValueError( "every kind of every item in `fnmatch_filters' must be" - " \"include\" or \"exclude\"" + " \"include\", \"exclude\" or \"accept-treesum\"" ) # Not following symlinks to files is not yet supported: reset to True @@ -704,6 +716,104 @@ self._writer.write_file_digest(self._algorithm[1], opath, None) self._writer.flush() return (e.errno, None, None, None) + + # Check whether to accept existing treesum digest files + if self._fnmatcher: + for fso in fsobjects: + fpath = join_output_path(top, fso.name) + if self._fnmatcher.shall_accept_treesum(fpath): + # Yes we have hit a treesum digest file + logging.debug("Accepting existing treesum from: %s", fpath) + collector = DigestSizeCollector() + try: + collector.collect_from_file(os.path.join(root, fpath)) + except OSError as e: + eno = e.errno + emsg = e.strerror + except Exception as e: + # XXX FIXME: other EIO, EBADF, EFAULT + eno = errno.ESRCH + emsg = str(e) + else: + eno = 0 + emsg = None + if self._utf8_mode: + fpath = walk.WalkDirEntry.alt_u8(fpath) + else: + fpath = walk.WalkDirEntry.alt_fs(fpath) + opath = join_output_path(top, None) + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(opath) + else: + opath = walk.WalkDirEntry.alt_fs(opath) + if eno == 0: + if self._size_only: + if collector.size is None: + # This is a severe error here + self._writer.write_error(util.b( + util.interpolate_bytes( + "No size in treesum-file `%s' while" + " requiring it", + fpath), + "utf-8")) + self._writer.write_size(opath, None) + return (errno.ESRCH, None, None, None) + else: + if self._print_size: + if collector.size is None: + # + # XXX FIXME: Is this a **severe** error + # here? Currently: no + # + self._writer.write_error(util.b( + util.interpolate_bytes( + "No size in treesum-file `%s'", + fpath), + "utf-8")) + sz = "" + else: + sz = collector.size + else: + sz = None + if collector.digest is None: + # This is really a severe error + self._writer.write_error(util.b( + util.interpolate_bytes( + "No digest in treesum-file `%s' while" + " it is required", + fpath), + "utf-8")) + self._writer.write_file_digest( + collector.algorithm or "MD5", + opath, + None, + use_base64=self._use_base64, + size=sz) + return (errno.ESRCH, None, None, None) + if self._size_only: + self._writer.write_size(opath, collector.size) + else: + self._writer.write_file_digest( + collector.algorithm, opath, collector.digest, + use_base64=self._use_base64, size=sz) + return (0, + collector.algorithm, + collector.digest, + collector.size) + else: + self._writer.write_error(util.interpolate_bytes( + "Cannot read treesum-file `%s' for directory" + "`%s': %s", + fpath, + opath, + util.b(emsg, "utf-8"))) + if self._size_only: + self._writer.write_size(opath, None) + else: + self._writer.write_file_digest( + self._algorithm[1], opath, None, + use_base64=self._use_base64, size=None) + return (eno, None, None, None) if self._utf8_mode: fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) else: @@ -898,7 +1008,11 @@ # Yes -- skipped continue if sub_dir_errno == 0: - dir_size += sub_dir_size + if sub_dir_size is None: + if self._print_size or self._size_only: + dir_tainted = True + else: + dir_size += (sub_dir_size or 0) else: dir_tainted = True dir_dgst.update(util.interpolate_bytes( @@ -1645,5 +1759,33 @@ print(" Errors: <none>") +class DigestSizeCollector(object): + + def __init__(self): + self._algorithm = self._digest = self._size = None + + def __call__(self, block_no, tag, generator, fsencoding, flags, + fnmatch_filters, comments, errors, + algorithm, digest, size): + self._algorithm = algorithm + self._digest = digest + self._size = size + + @property + def algorithm(self): + return self._algorithm + + @property + def digest(self): + return self._digest + + @property + def size(self): + return self._size + + def collect_from_file(self, digest_file): + get_infos_from_digestfile([digest_file], self, True) + + if __name__ == "__main__": sys.exit(main())
--- a/cutils/util/fnmatch.py Fri Mar 07 14:22:22 2025 +0100 +++ b/cutils/util/fnmatch.py Sat Mar 08 04:49:06 2025 +0100 @@ -15,6 +15,7 @@ import re +from . import PY2 from . import glob @@ -95,8 +96,6 @@ "fullpath": fullpath_factory, } - VISIT_DEFAULT = True # Overall default value for visiting - def __init__(self, matchers): super(FnMatcher, self).__init__() self._matchers = matchers @@ -106,6 +105,7 @@ matchers = [] if filter_definitions: for action, kpattern in filter_definitions: + assert action in ("include", "exclude", "accept-treesum") kind, sep, pattern = kpattern.partition(':') if not sep: # use the default @@ -117,19 +117,36 @@ matchers.append((action, kind, factory(pattern), pattern)) return klass(matchers) - def shall_visit(self, fn, default=None): - visit = default if default is not None else self.VISIT_DEFAULT + def shall_visit(self, fn, default=True): + visit = default for action, kind, matcher, orig_pattern in self._matchers: - res = matcher(fn) - if res: + if matcher(fn): if action == "include": visit = True - elif action == "exclude": + elif action in ("exclude", "accept-treesum"): visit = False else: raise RuntimeError("unknown action: {}".format(action)) return visit + def shall_accept_treesum(self, fn, default=False): + accept = default + for action, kind, matcher, orig_pattern in self._matchers: + if action == "accept-treesum": + if matcher(fn): + accept = True + elif action in ("include", "exclude"): + pass + else: + raise RuntimeError("unknown action: {}".format(action)) + return accept + def definitions(self): for action, kind, matcher, orig_pattern in self._matchers: yield (action, kind, orig_pattern) + + def __bool__(self): + return bool(self._matchers) + + if PY2: + __nonzero__ = __bool__
