Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 277:9676ecd32a07
treesum: FIX: Proper error handling.
Many errors are not detected at scandir times but then symlinks is to be
followed or the content of a file is to be read.
Note also that the size of a file can be available (because its directory
is readable) but the content of the file is not.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 21 Feb 2025 16:51:03 +0100 |
| parents | c72f5b2dbc6f |
| children | 822cf3a1da22 |
comparison
equal
deleted
inserted
replaced
| 276:f7850ff5cbe0 | 277:9676ecd32a07 |
|---|---|
| 619 if self._size_only: | 619 if self._size_only: |
| 620 self._writer.write_size(opath, None) | 620 self._writer.write_size(opath, None) |
| 621 else: | 621 else: |
| 622 self._writer.write_file_digest(self._algorithm[1], opath, None) | 622 self._writer.write_file_digest(self._algorithm[1], opath, None) |
| 623 self._writer.flush() | 623 self._writer.flush() |
| 624 return (None, None, None) | 624 return (e.errno, None, None, None) |
| 625 if self._utf8_mode: | 625 if self._utf8_mode: |
| 626 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) | 626 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) |
| 627 else: | 627 else: |
| 628 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) | 628 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) |
| 629 dir_dgst = self._algorithm[0]() | 629 dir_dgst = self._algorithm[0]() |
| 797 # | 797 # |
| 798 # Follow the symlink to dir or handle a "real" directory | 798 # Follow the symlink to dir or handle a "real" directory |
| 799 # | 799 # |
| 800 | 800 |
| 801 # Get subdir data from recursing into it | 801 # Get subdir data from recursing into it |
| 802 sub_dir_algo, sub_dir_dgst, sub_dir_size = self._generate( | 802 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \ |
| 803 root, top + (fso.name, )) | 803 self._generate(root, top + (fso.name, )) |
| 804 | 804 |
| 805 if (sub_dir_algo is None or sub_dir_dgst is None | 805 if sub_dir_errno == 0: |
| 806 or sub_dir_size is None): | 806 dir_size += sub_dir_size |
| 807 # | 807 else: |
| 808 # This should not happen: | 808 dir_tainted = True |
| 809 # - top-level directories are handled above | |
| 810 # - other filesystem objects should also have been | |
| 811 # handled already | |
| 812 # | |
| 813 assert False | |
| 814 | |
| 815 dir_size += sub_dir_size | |
| 816 dir_dgst.update(util.interpolate_bytes( | 809 dir_dgst.update(util.interpolate_bytes( |
| 817 b"1:/,%d:%s,", | 810 b"1:/,%d:%s,", |
| 818 len(effective_fso_name), | 811 len(effective_fso_name), |
| 819 effective_fso_name)) | 812 effective_fso_name)) |
| 820 if self._with_metadata_full_mode: | 813 if self._with_metadata_full_mode: |
| 824 elif self._with_metadata_mode: | 817 elif self._with_metadata_mode: |
| 825 modestr = util.b(normalized_compatible_mode_str( | 818 modestr = util.b(normalized_compatible_mode_str( |
| 826 fso.stat.st_mode)) | 819 fso.stat.st_mode)) |
| 827 dir_dgst.update(util.interpolate_bytes( | 820 dir_dgst.update(util.interpolate_bytes( |
| 828 b"4:mode,%d:%s,", len(modestr), modestr)) | 821 b"4:mode,%d:%s,", len(modestr), modestr)) |
| 829 dir_dgst.update(util.interpolate_bytes( | 822 if sub_dir_errno == 0: |
| 830 b"%d:%s,%d:%s,", | 823 dir_dgst.update(util.interpolate_bytes( |
| 831 len(sub_dir_algo), util.b(sub_dir_algo), | 824 b"%d:%s,%d:%s,", |
| 832 len(sub_dir_dgst), sub_dir_dgst)) | 825 len(sub_dir_algo), util.b(sub_dir_algo), |
| 826 len(sub_dir_dgst), sub_dir_dgst)) | |
| 827 else: | |
| 828 # NOTE: error message is already printed here | |
| 829 dir_dgst.update(util.interpolate_bytes( | |
| 830 b"5:errno,%d:%s", | |
| 831 len(str(sub_dir_errno)), | |
| 832 util.b(str(sub_dir_errno)))) | |
| 833 else: | 833 else: |
| 834 if fso.is_symlink and not self._follow_symlinks.file: | 834 if fso.is_symlink and not self._follow_symlinks.file: |
| 835 # | 835 # |
| 836 # Symbolic link to some filesystem object which is not | 836 # Symbolic link to some filesystem object which is not |
| 837 # determined to be a link to a directory or some other | 837 # determined to be a link to a directory or some other |
| 928 elif self._with_metadata_mode: | 928 elif self._with_metadata_mode: |
| 929 modestr = util.b(normalized_compatible_mode_str( | 929 modestr = util.b(normalized_compatible_mode_str( |
| 930 fso.stat.st_mode)) | 930 fso.stat.st_mode)) |
| 931 dir_dgst.update(util.interpolate_bytes( | 931 dir_dgst.update(util.interpolate_bytes( |
| 932 b"4:mode,%d:%s,", len(modestr), modestr)) | 932 b"4:mode,%d:%s,", len(modestr), modestr)) |
| 933 if not self._size_only: | |
| 934 dgst = digest.compute_digest_file( | |
| 935 self._algorithm[0], | |
| 936 fso.path, | |
| 937 use_mmap=self._use_mmap) | |
| 938 dir_dgst.update(util.interpolate_bytes( | |
| 939 b"%d:%s,%d:%s,", | |
| 940 len(self._algorithm[1]), | |
| 941 util.b(self._algorithm[1]), | |
| 942 len(dgst), | |
| 943 dgst)) | |
| 944 if self._size_only: | 933 if self._size_only: |
| 934 # | |
| 935 # size can be printed here because .stat is | |
| 936 # available | |
| 937 # | |
| 945 self._writer.write_size(opath, fso.stat.st_size) | 938 self._writer.write_size(opath, fso.stat.st_size) |
| 946 else: | 939 else: |
| 947 sz = fso.stat.st_size if self._print_size else None | 940 try: |
| 948 self._writer.write_file_digest( | 941 dgst = digest.compute_digest_file( |
| 949 self._algorithm[1], opath, dgst, | 942 self._algorithm[0], |
| 950 use_base64=self._use_base64, | 943 fso.path, |
| 951 size=sz) | 944 use_mmap=self._use_mmap) |
| 945 except OSError as e: | |
| 946 dir_tainted = True | |
| 947 self._writer.write_error( | |
| 948 util.interpolate_bytes( | |
| 949 b"`%s': errno %d: %s", | |
| 950 opath, | |
| 951 e.errno, | |
| 952 util.b(e.strerror, "utf-8"))) | |
| 953 sz = (fso.stat.st_size if self._print_size | |
| 954 else None) | |
| 955 self._writer.write_file_digest( | |
| 956 self._algorithm[1], opath, None, | |
| 957 size=sz) | |
| 958 else: | |
| 959 dir_dgst.update(util.interpolate_bytes( | |
| 960 b"%d:%s,%d:%s,", | |
| 961 len(self._algorithm[1]), | |
| 962 util.b(self._algorithm[1]), | |
| 963 len(dgst), | |
| 964 dgst)) | |
| 965 sz = (fso.stat.st_size if self._print_size | |
| 966 else None) | |
| 967 self._writer.write_file_digest( | |
| 968 self._algorithm[1], opath, dgst, | |
| 969 use_base64=self._use_base64, | |
| 970 size=sz) | |
| 952 self._writer.flush() | 971 self._writer.flush() |
| 953 opath = join_output_path(top, None) | 972 opath = join_output_path(top, None) |
| 954 if opath: | 973 if opath: |
| 955 if self._utf8_mode: | 974 if self._utf8_mode: |
| 956 opath = walk.WalkDirEntry.alt_u8(opath) | 975 opath = walk.WalkDirEntry.alt_u8(opath) |
| 960 # | 979 # |
| 961 # IMPORTANT: Print errors BEFORE the associated digest or size | 980 # IMPORTANT: Print errors BEFORE the associated digest or size |
| 962 # line. Otherwise the "info" command has a problem. | 981 # line. Otherwise the "info" command has a problem. |
| 963 # | 982 # |
| 964 self._writer.write_error(b"directory is tainted") | 983 self._writer.write_error(b"directory is tainted") |
| 965 logging.error("Directory has filename and/or symlink problems: %r", | 984 logging.error("Directory has problems: %r", opath) |
| 966 opath) | |
| 967 if self._size_only: | 985 if self._size_only: |
| 968 self._writer.write_size(opath, dir_size) | 986 self._writer.write_size(opath, dir_size) |
| 969 else: | 987 else: |
| 970 sz = dir_size if self._print_size else None | 988 sz = dir_size if self._print_size else None |
| 971 self._writer.write_file_digest( | 989 self._writer.write_file_digest( |
| 972 self._algorithm[1], opath, dir_dgst.digest(), | 990 self._algorithm[1], opath, dir_dgst.digest(), |
| 973 use_base64=self._use_base64, size=sz) | 991 use_base64=self._use_base64, size=sz) |
| 974 self._writer.flush() | 992 self._writer.flush() |
| 975 return (self._algorithm[1], dir_dgst.digest(), dir_size) | 993 return (0, self._algorithm[1], dir_dgst.digest(), dir_size) |
| 976 | 994 |
| 977 | 995 |
| 978 def join_output_path(top, name): | 996 def join_output_path(top, name): |
| 979 if name is None: | 997 if name is None: |
| 980 # a path for a directory is to be computed | 998 # a path for a directory is to be computed |
