comparison cutils/treesum.py @ 308:652870b20f9e

treesum: Implement --accept-treesum: trust a treesum-file for a directory checksum
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 08 Mar 2025 04:49:06 +0100
parents 64df94bf4659
children dbad01eb9d03
comparison
equal deleted inserted replaced
307:64df94bf4659 308:652870b20f9e
43 This is because :class:`argparse.ArgumentParser` does not 43 This is because :class:`argparse.ArgumentParser` does not
44 support them for all supported Python versions. 44 support them for all supported Python versions.
45 45
46 """ 46 """
47 gp.add_argument( 47 gp.add_argument(
48 "--accept-treesum", "-A", action=PatternMatchAction,
49 kind="accept-treesum",
50 dest="fnmatch_filters", metavar="PATTERN",
51 help="""Accept an existing treesum file PATTERN for a directory
52 checksum.
53 Implicitly this also acts as `--exclude' option.
54 Can be given more than once.""")
55 gp.add_argument(
48 "--algorithm", "-a", action="store", type=util.argv2algo, 56 "--algorithm", "-a", action="store", type=util.argv2algo,
49 help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), " 57 help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), "
50 "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, " 58 "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, "
51 "blake2b, blake2b-256, blake2s, " 59 "blake2b, blake2b-256, blake2s, "
52 "blake2 (alias for blake2b), " 60 "blake2 (alias for blake2b), "
67 "Can be given more than once.") 75 "Can be given more than once.")
68 gp.add_argument( 76 gp.add_argument(
69 "--exclude", "-X", action=PatternMatchAction, kind="exclude", 77 "--exclude", "-X", action=PatternMatchAction, kind="exclude",
70 dest="fnmatch_filters", metavar="PATTERN", 78 dest="fnmatch_filters", metavar="PATTERN",
71 help="""Exclude names matching the given PATTERN. 79 help="""Exclude names matching the given PATTERN.
72 For help on PATTERN use \"help patterns\".""") 80 For help on PATTERN use \"help patterns\".
81 Can be given more than once.""")
73 gp.add_argument( 82 gp.add_argument(
74 "--follow-directory-symlinks", "-l", action=SymlinkAction, 83 "--follow-directory-symlinks", "-l", action=SymlinkAction,
75 const="follow-directory-symlinks", 84 const="follow-directory-symlinks",
76 default=FollowSymlinkConfig(False, False, True), 85 default=FollowSymlinkConfig(False, False, True),
77 dest="follow_symlinks", 86 dest="follow_symlinks",
106 --no-follow-file-symlinks, et al.).""") 115 --no-follow-file-symlinks, et al.).""")
107 gp.add_argument( 116 gp.add_argument(
108 "--include", "-I", action=PatternMatchAction, kind="include", 117 "--include", "-I", action=PatternMatchAction, kind="include",
109 dest="fnmatch_filters", metavar="PATTERN", 118 dest="fnmatch_filters", metavar="PATTERN",
110 help="""Include names matching the given PATTERN. 119 help="""Include names matching the given PATTERN.
111 For help on PATTERN use \"help patterns\".""") 120 For help on PATTERN use \"help patterns\".
121 Can be given more than once.""")
112 gp.add_argument( 122 gp.add_argument(
113 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks", 123 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks",
114 const=FollowSymlinkConfig(True, True, True), 124 const=FollowSymlinkConfig(True, True, True),
115 help="""Follow symbolic links everywhere: on command line 125 help="""Follow symbolic links everywhere: on command line
116 arguments and -- while walking -- directory and file symbolic links. 126 arguments and -- while walking -- directory and file symbolic links.
385 kwargs["nargs"] = 1 395 kwargs["nargs"] = 1
386 396
387 self.__kind = kwargs.pop("kind", None) 397 self.__kind = kwargs.pop("kind", None)
388 if self.__kind is None: 398 if self.__kind is None:
389 raise argparse.ArgumentError(None, "`kind' is required") 399 raise argparse.ArgumentError(None, "`kind' is required")
390 if self.__kind not in ("exclude", "include"): 400 if self.__kind not in ("exclude", "include", "accept-treesum"):
391 raise argparse.ArgumentError( 401 raise argparse.ArgumentError(
392 None, "`kind' must be one of `include' or `exclude'") 402 None,
403 "`kind' must be one of `include', `exclude' or"
404 " `accept-treesum'")
393 405
394 super(PatternMatchAction, self).__init__(*args, **kwargs) 406 super(PatternMatchAction, self).__init__(*args, **kwargs)
395 407
396 def __call__(self, parser, namespace, values, option_string=None): 408 def __call__(self, parser, namespace, values, option_string=None):
397 items = getattr(namespace, self.dest, None) 409 items = getattr(namespace, self.dest, None)
430 raise TypeError( 442 raise TypeError(
431 "items in `fnmatch_filters' must be tuples or lists") 443 "items in `fnmatch_filters' must be tuples or lists")
432 if f[0] not in ("exclude", "include"): 444 if f[0] not in ("exclude", "include"):
433 raise ValueError( 445 raise ValueError(
434 "every kind of every item in `fnmatch_filters' must be" 446 "every kind of every item in `fnmatch_filters' must be"
435 " \"include\" or \"exclude\"" 447 " \"include\", \"exclude\" or \"accept-treesum\""
436 ) 448 )
437 449
438 # Not following symlinks to files is not yet supported: reset to True 450 # Not following symlinks to files is not yet supported: reset to True
439 # if not follow_symlinks.file: 451 # if not follow_symlinks.file:
440 # follow_symlinks = follow_symlinks._make([follow_symlinks.command_line, 452 # follow_symlinks = follow_symlinks._make([follow_symlinks.command_line,
702 self._writer.write_size(opath, None) 714 self._writer.write_size(opath, None)
703 else: 715 else:
704 self._writer.write_file_digest(self._algorithm[1], opath, None) 716 self._writer.write_file_digest(self._algorithm[1], opath, None)
705 self._writer.flush() 717 self._writer.flush()
706 return (e.errno, None, None, None) 718 return (e.errno, None, None, None)
719
720 # Check whether to accept existing treesum digest files
721 if self._fnmatcher:
722 for fso in fsobjects:
723 fpath = join_output_path(top, fso.name)
724 if self._fnmatcher.shall_accept_treesum(fpath):
725 # Yes we have hit a treesum digest file
726 logging.debug("Accepting existing treesum from: %s", fpath)
727 collector = DigestSizeCollector()
728 try:
729 collector.collect_from_file(os.path.join(root, fpath))
730 except OSError as e:
731 eno = e.errno
732 emsg = e.strerror
733 except Exception as e:
734 # XXX FIXME: other EIO, EBADF, EFAULT
735 eno = errno.ESRCH
736 emsg = str(e)
737 else:
738 eno = 0
739 emsg = None
740 if self._utf8_mode:
741 fpath = walk.WalkDirEntry.alt_u8(fpath)
742 else:
743 fpath = walk.WalkDirEntry.alt_fs(fpath)
744 opath = join_output_path(top, None)
745 if self._utf8_mode:
746 opath = walk.WalkDirEntry.alt_u8(opath)
747 else:
748 opath = walk.WalkDirEntry.alt_fs(opath)
749 if eno == 0:
750 if self._size_only:
751 if collector.size is None:
752 # This is a severe error here
753 self._writer.write_error(util.b(
754 util.interpolate_bytes(
755 "No size in treesum-file `%s' while"
756 " requiring it",
757 fpath),
758 "utf-8"))
759 self._writer.write_size(opath, None)
760 return (errno.ESRCH, None, None, None)
761 else:
762 if self._print_size:
763 if collector.size is None:
764 #
765 # XXX FIXME: Is this a **severe** error
766 # here? Currently: no
767 #
768 self._writer.write_error(util.b(
769 util.interpolate_bytes(
770 "No size in treesum-file `%s'",
771 fpath),
772 "utf-8"))
773 sz = ""
774 else:
775 sz = collector.size
776 else:
777 sz = None
778 if collector.digest is None:
779 # This is really a severe error
780 self._writer.write_error(util.b(
781 util.interpolate_bytes(
782 "No digest in treesum-file `%s' while"
783 " it is required",
784 fpath),
785 "utf-8"))
786 self._writer.write_file_digest(
787 collector.algorithm or "MD5",
788 opath,
789 None,
790 use_base64=self._use_base64,
791 size=sz)
792 return (errno.ESRCH, None, None, None)
793 if self._size_only:
794 self._writer.write_size(opath, collector.size)
795 else:
796 self._writer.write_file_digest(
797 collector.algorithm, opath, collector.digest,
798 use_base64=self._use_base64, size=sz)
799 return (0,
800 collector.algorithm,
801 collector.digest,
802 collector.size)
803 else:
804 self._writer.write_error(util.interpolate_bytes(
805 "Cannot read treesum-file `%s' for directory"
806 "`%s': %s",
807 fpath,
808 opath,
809 util.b(emsg, "utf-8")))
810 if self._size_only:
811 self._writer.write_size(opath, None)
812 else:
813 self._writer.write_file_digest(
814 self._algorithm[1], opath, None,
815 use_base64=self._use_base64, size=None)
816 return (eno, None, None, None)
707 if self._utf8_mode: 817 if self._utf8_mode:
708 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) 818 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8)
709 else: 819 else:
710 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) 820 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs)
711 dir_dgst = self._algorithm[0]() 821 dir_dgst = self._algorithm[0]()
896 # 1006 #
897 if sub_dir_errno is None: 1007 if sub_dir_errno is None:
898 # Yes -- skipped 1008 # Yes -- skipped
899 continue 1009 continue
900 if sub_dir_errno == 0: 1010 if sub_dir_errno == 0:
901 dir_size += sub_dir_size 1011 if sub_dir_size is None:
1012 if self._print_size or self._size_only:
1013 dir_tainted = True
1014 else:
1015 dir_size += (sub_dir_size or 0)
902 else: 1016 else:
903 dir_tainted = True 1017 dir_tainted = True
904 dir_dgst.update(util.interpolate_bytes( 1018 dir_dgst.update(util.interpolate_bytes(
905 b"1:/,%d:%s,", 1019 b"1:/,%d:%s,",
906 len(effective_fso_name), 1020 len(effective_fso_name),
1643 print(" ", err) 1757 print(" ", err)
1644 else: 1758 else:
1645 print(" Errors: <none>") 1759 print(" Errors: <none>")
1646 1760
1647 1761
1762 class DigestSizeCollector(object):
1763
1764 def __init__(self):
1765 self._algorithm = self._digest = self._size = None
1766
1767 def __call__(self, block_no, tag, generator, fsencoding, flags,
1768 fnmatch_filters, comments, errors,
1769 algorithm, digest, size):
1770 self._algorithm = algorithm
1771 self._digest = digest
1772 self._size = size
1773
1774 @property
1775 def algorithm(self):
1776 return self._algorithm
1777
1778 @property
1779 def digest(self):
1780 return self._digest
1781
1782 @property
1783 def size(self):
1784 return self._size
1785
1786 def collect_from_file(self, digest_file):
1787 get_infos_from_digestfile([digest_file], self, True)
1788
1789
1648 if __name__ == "__main__": 1790 if __name__ == "__main__":
1649 sys.exit(main()) 1791 sys.exit(main())