Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 308:652870b20f9e
treesum: Implement --accept-treesum: trust a treesum-file for a directory checksum
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 08 Mar 2025 04:49:06 +0100 |
| parents | 64df94bf4659 |
| children | dbad01eb9d03 |
comparison
equal
deleted
inserted
replaced
| 307:64df94bf4659 | 308:652870b20f9e |
|---|---|
| 43 This is because :class:`argparse.ArgumentParser` does not | 43 This is because :class:`argparse.ArgumentParser` does not |
| 44 support them for all supported Python versions. | 44 support them for all supported Python versions. |
| 45 | 45 |
| 46 """ | 46 """ |
| 47 gp.add_argument( | 47 gp.add_argument( |
| 48 "--accept-treesum", "-A", action=PatternMatchAction, | |
| 49 kind="accept-treesum", | |
| 50 dest="fnmatch_filters", metavar="PATTERN", | |
| 51 help="""Accept an existing treesum file PATTERN for a directory | |
| 52 checksum. | |
| 53 Implicitly this also acts as `--exclude' option. | |
| 54 Can be given more than once.""") | |
| 55 gp.add_argument( | |
| 48 "--algorithm", "-a", action="store", type=util.argv2algo, | 56 "--algorithm", "-a", action="store", type=util.argv2algo, |
| 49 help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), " | 57 help="1 (aka sha1), 224, 256 (aka sha256), 384, 512 (aka sha512), " |
| 50 "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, " | 58 "3 (alias for sha3-512), 3-224, 3-256, 3-384, 3-512, " |
| 51 "blake2b, blake2b-256, blake2s, " | 59 "blake2b, blake2b-256, blake2s, " |
| 52 "blake2 (alias for blake2b), " | 60 "blake2 (alias for blake2b), " |
| 67 "Can be given more than once.") | 75 "Can be given more than once.") |
| 68 gp.add_argument( | 76 gp.add_argument( |
| 69 "--exclude", "-X", action=PatternMatchAction, kind="exclude", | 77 "--exclude", "-X", action=PatternMatchAction, kind="exclude", |
| 70 dest="fnmatch_filters", metavar="PATTERN", | 78 dest="fnmatch_filters", metavar="PATTERN", |
| 71 help="""Exclude names matching the given PATTERN. | 79 help="""Exclude names matching the given PATTERN. |
| 72 For help on PATTERN use \"help patterns\".""") | 80 For help on PATTERN use \"help patterns\". |
| 81 Can be given more than once.""") | |
| 73 gp.add_argument( | 82 gp.add_argument( |
| 74 "--follow-directory-symlinks", "-l", action=SymlinkAction, | 83 "--follow-directory-symlinks", "-l", action=SymlinkAction, |
| 75 const="follow-directory-symlinks", | 84 const="follow-directory-symlinks", |
| 76 default=FollowSymlinkConfig(False, False, True), | 85 default=FollowSymlinkConfig(False, False, True), |
| 77 dest="follow_symlinks", | 86 dest="follow_symlinks", |
| 106 --no-follow-file-symlinks, et al.).""") | 115 --no-follow-file-symlinks, et al.).""") |
| 107 gp.add_argument( | 116 gp.add_argument( |
| 108 "--include", "-I", action=PatternMatchAction, kind="include", | 117 "--include", "-I", action=PatternMatchAction, kind="include", |
| 109 dest="fnmatch_filters", metavar="PATTERN", | 118 dest="fnmatch_filters", metavar="PATTERN", |
| 110 help="""Include names matching the given PATTERN. | 119 help="""Include names matching the given PATTERN. |
| 111 For help on PATTERN use \"help patterns\".""") | 120 For help on PATTERN use \"help patterns\". |
| 121 Can be given more than once.""") | |
| 112 gp.add_argument( | 122 gp.add_argument( |
| 113 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks", | 123 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks", |
| 114 const=FollowSymlinkConfig(True, True, True), | 124 const=FollowSymlinkConfig(True, True, True), |
| 115 help="""Follow symbolic links everywhere: on command line | 125 help="""Follow symbolic links everywhere: on command line |
| 116 arguments and -- while walking -- directory and file symbolic links. | 126 arguments and -- while walking -- directory and file symbolic links. |
| 385 kwargs["nargs"] = 1 | 395 kwargs["nargs"] = 1 |
| 386 | 396 |
| 387 self.__kind = kwargs.pop("kind", None) | 397 self.__kind = kwargs.pop("kind", None) |
| 388 if self.__kind is None: | 398 if self.__kind is None: |
| 389 raise argparse.ArgumentError(None, "`kind' is required") | 399 raise argparse.ArgumentError(None, "`kind' is required") |
| 390 if self.__kind not in ("exclude", "include"): | 400 if self.__kind not in ("exclude", "include", "accept-treesum"): |
| 391 raise argparse.ArgumentError( | 401 raise argparse.ArgumentError( |
| 392 None, "`kind' must be one of `include' or `exclude'") | 402 None, |
| 403 "`kind' must be one of `include', `exclude' or" | |
| 404 " `accept-treesum'") | |
| 393 | 405 |
| 394 super(PatternMatchAction, self).__init__(*args, **kwargs) | 406 super(PatternMatchAction, self).__init__(*args, **kwargs) |
| 395 | 407 |
| 396 def __call__(self, parser, namespace, values, option_string=None): | 408 def __call__(self, parser, namespace, values, option_string=None): |
| 397 items = getattr(namespace, self.dest, None) | 409 items = getattr(namespace, self.dest, None) |
| 430 raise TypeError( | 442 raise TypeError( |
| 431 "items in `fnmatch_filters' must be tuples or lists") | 443 "items in `fnmatch_filters' must be tuples or lists") |
| 432 if f[0] not in ("exclude", "include"): | 444 if f[0] not in ("exclude", "include"): |
| 433 raise ValueError( | 445 raise ValueError( |
| 434 "every kind of every item in `fnmatch_filters' must be" | 446 "every kind of every item in `fnmatch_filters' must be" |
| 435 " \"include\" or \"exclude\"" | 447 " \"include\", \"exclude\" or \"accept-treesum\"" |
| 436 ) | 448 ) |
| 437 | 449 |
| 438 # Not following symlinks to files is not yet supported: reset to True | 450 # Not following symlinks to files is not yet supported: reset to True |
| 439 # if not follow_symlinks.file: | 451 # if not follow_symlinks.file: |
| 440 # follow_symlinks = follow_symlinks._make([follow_symlinks.command_line, | 452 # follow_symlinks = follow_symlinks._make([follow_symlinks.command_line, |
| 702 self._writer.write_size(opath, None) | 714 self._writer.write_size(opath, None) |
| 703 else: | 715 else: |
| 704 self._writer.write_file_digest(self._algorithm[1], opath, None) | 716 self._writer.write_file_digest(self._algorithm[1], opath, None) |
| 705 self._writer.flush() | 717 self._writer.flush() |
| 706 return (e.errno, None, None, None) | 718 return (e.errno, None, None, None) |
| 719 | |
| 720 # Check whether to accept existing treesum digest files | |
| 721 if self._fnmatcher: | |
| 722 for fso in fsobjects: | |
| 723 fpath = join_output_path(top, fso.name) | |
| 724 if self._fnmatcher.shall_accept_treesum(fpath): | |
| 725 # Yes we have hit a treesum digest file | |
| 726 logging.debug("Accepting existing treesum from: %s", fpath) | |
| 727 collector = DigestSizeCollector() | |
| 728 try: | |
| 729 collector.collect_from_file(os.path.join(root, fpath)) | |
| 730 except OSError as e: | |
| 731 eno = e.errno | |
| 732 emsg = e.strerror | |
| 733 except Exception as e: | |
| 734 # XXX FIXME: other EIO, EBADF, EFAULT | |
| 735 eno = errno.ESRCH | |
| 736 emsg = str(e) | |
| 737 else: | |
| 738 eno = 0 | |
| 739 emsg = None | |
| 740 if self._utf8_mode: | |
| 741 fpath = walk.WalkDirEntry.alt_u8(fpath) | |
| 742 else: | |
| 743 fpath = walk.WalkDirEntry.alt_fs(fpath) | |
| 744 opath = join_output_path(top, None) | |
| 745 if self._utf8_mode: | |
| 746 opath = walk.WalkDirEntry.alt_u8(opath) | |
| 747 else: | |
| 748 opath = walk.WalkDirEntry.alt_fs(opath) | |
| 749 if eno == 0: | |
| 750 if self._size_only: | |
| 751 if collector.size is None: | |
| 752 # This is a severe error here | |
| 753 self._writer.write_error(util.b( | |
| 754 util.interpolate_bytes( | |
| 755 "No size in treesum-file `%s' while" | |
| 756 " requiring it", | |
| 757 fpath), | |
| 758 "utf-8")) | |
| 759 self._writer.write_size(opath, None) | |
| 760 return (errno.ESRCH, None, None, None) | |
| 761 else: | |
| 762 if self._print_size: | |
| 763 if collector.size is None: | |
| 764 # | |
| 765 # XXX FIXME: Is this a **severe** error | |
| 766 # here? Currently: no | |
| 767 # | |
| 768 self._writer.write_error(util.b( | |
| 769 util.interpolate_bytes( | |
| 770 "No size in treesum-file `%s'", | |
| 771 fpath), | |
| 772 "utf-8")) | |
| 773 sz = "" | |
| 774 else: | |
| 775 sz = collector.size | |
| 776 else: | |
| 777 sz = None | |
| 778 if collector.digest is None: | |
| 779 # This is really a severe error | |
| 780 self._writer.write_error(util.b( | |
| 781 util.interpolate_bytes( | |
| 782 "No digest in treesum-file `%s' while" | |
| 783 " it is required", | |
| 784 fpath), | |
| 785 "utf-8")) | |
| 786 self._writer.write_file_digest( | |
| 787 collector.algorithm or "MD5", | |
| 788 opath, | |
| 789 None, | |
| 790 use_base64=self._use_base64, | |
| 791 size=sz) | |
| 792 return (errno.ESRCH, None, None, None) | |
| 793 if self._size_only: | |
| 794 self._writer.write_size(opath, collector.size) | |
| 795 else: | |
| 796 self._writer.write_file_digest( | |
| 797 collector.algorithm, opath, collector.digest, | |
| 798 use_base64=self._use_base64, size=sz) | |
| 799 return (0, | |
| 800 collector.algorithm, | |
| 801 collector.digest, | |
| 802 collector.size) | |
| 803 else: | |
| 804 self._writer.write_error(util.interpolate_bytes( | |
| 805 "Cannot read treesum-file `%s' for directory" | |
| 806 "`%s': %s", | |
| 807 fpath, | |
| 808 opath, | |
| 809 util.b(emsg, "utf-8"))) | |
| 810 if self._size_only: | |
| 811 self._writer.write_size(opath, None) | |
| 812 else: | |
| 813 self._writer.write_file_digest( | |
| 814 self._algorithm[1], opath, None, | |
| 815 use_base64=self._use_base64, size=None) | |
| 816 return (eno, None, None, None) | |
| 707 if self._utf8_mode: | 817 if self._utf8_mode: |
| 708 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) | 818 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) |
| 709 else: | 819 else: |
| 710 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) | 820 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) |
| 711 dir_dgst = self._algorithm[0]() | 821 dir_dgst = self._algorithm[0]() |
| 896 # | 1006 # |
| 897 if sub_dir_errno is None: | 1007 if sub_dir_errno is None: |
| 898 # Yes -- skipped | 1008 # Yes -- skipped |
| 899 continue | 1009 continue |
| 900 if sub_dir_errno == 0: | 1010 if sub_dir_errno == 0: |
| 901 dir_size += sub_dir_size | 1011 if sub_dir_size is None: |
| 1012 if self._print_size or self._size_only: | |
| 1013 dir_tainted = True | |
| 1014 else: | |
| 1015 dir_size += (sub_dir_size or 0) | |
| 902 else: | 1016 else: |
| 903 dir_tainted = True | 1017 dir_tainted = True |
| 904 dir_dgst.update(util.interpolate_bytes( | 1018 dir_dgst.update(util.interpolate_bytes( |
| 905 b"1:/,%d:%s,", | 1019 b"1:/,%d:%s,", |
| 906 len(effective_fso_name), | 1020 len(effective_fso_name), |
| 1643 print(" ", err) | 1757 print(" ", err) |
| 1644 else: | 1758 else: |
| 1645 print(" Errors: <none>") | 1759 print(" Errors: <none>") |
| 1646 | 1760 |
| 1647 | 1761 |
| 1762 class DigestSizeCollector(object): | |
| 1763 | |
| 1764 def __init__(self): | |
| 1765 self._algorithm = self._digest = self._size = None | |
| 1766 | |
| 1767 def __call__(self, block_no, tag, generator, fsencoding, flags, | |
| 1768 fnmatch_filters, comments, errors, | |
| 1769 algorithm, digest, size): | |
| 1770 self._algorithm = algorithm | |
| 1771 self._digest = digest | |
| 1772 self._size = size | |
| 1773 | |
| 1774 @property | |
| 1775 def algorithm(self): | |
| 1776 return self._algorithm | |
| 1777 | |
| 1778 @property | |
| 1779 def digest(self): | |
| 1780 return self._digest | |
| 1781 | |
| 1782 @property | |
| 1783 def size(self): | |
| 1784 return self._size | |
| 1785 | |
| 1786 def collect_from_file(self, digest_file): | |
| 1787 get_infos_from_digestfile([digest_file], self, True) | |
| 1788 | |
| 1789 | |
| 1648 if __name__ == "__main__": | 1790 if __name__ == "__main__": |
| 1649 sys.exit(main()) | 1791 sys.exit(main()) |
