comparison cutils/treesum.py @ 277:9676ecd32a07

treesum: FIX: Proper error handling. Many errors are not detected at scandir times but then symlinks is to be followed or the content of a file is to be read. Note also that the size of a file can be available (because its directory is readable) but the content of the file is not.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 21 Feb 2025 16:51:03 +0100
parents c72f5b2dbc6f
children 822cf3a1da22
comparison
equal deleted inserted replaced
276:f7850ff5cbe0 277:9676ecd32a07
619 if self._size_only: 619 if self._size_only:
620 self._writer.write_size(opath, None) 620 self._writer.write_size(opath, None)
621 else: 621 else:
622 self._writer.write_file_digest(self._algorithm[1], opath, None) 622 self._writer.write_file_digest(self._algorithm[1], opath, None)
623 self._writer.flush() 623 self._writer.flush()
624 return (None, None, None) 624 return (e.errno, None, None, None)
625 if self._utf8_mode: 625 if self._utf8_mode:
626 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) 626 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8)
627 else: 627 else:
628 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) 628 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs)
629 dir_dgst = self._algorithm[0]() 629 dir_dgst = self._algorithm[0]()
797 # 797 #
798 # Follow the symlink to dir or handle a "real" directory 798 # Follow the symlink to dir or handle a "real" directory
799 # 799 #
800 800
801 # Get subdir data from recursing into it 801 # Get subdir data from recursing into it
802 sub_dir_algo, sub_dir_dgst, sub_dir_size = self._generate( 802 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \
803 root, top + (fso.name, )) 803 self._generate(root, top + (fso.name, ))
804 804
805 if (sub_dir_algo is None or sub_dir_dgst is None 805 if sub_dir_errno == 0:
806 or sub_dir_size is None): 806 dir_size += sub_dir_size
807 # 807 else:
808 # This should not happen: 808 dir_tainted = True
809 # - top-level directories are handled above
810 # - other filesystem objects should also have been
811 # handled already
812 #
813 assert False
814
815 dir_size += sub_dir_size
816 dir_dgst.update(util.interpolate_bytes( 809 dir_dgst.update(util.interpolate_bytes(
817 b"1:/,%d:%s,", 810 b"1:/,%d:%s,",
818 len(effective_fso_name), 811 len(effective_fso_name),
819 effective_fso_name)) 812 effective_fso_name))
820 if self._with_metadata_full_mode: 813 if self._with_metadata_full_mode:
824 elif self._with_metadata_mode: 817 elif self._with_metadata_mode:
825 modestr = util.b(normalized_compatible_mode_str( 818 modestr = util.b(normalized_compatible_mode_str(
826 fso.stat.st_mode)) 819 fso.stat.st_mode))
827 dir_dgst.update(util.interpolate_bytes( 820 dir_dgst.update(util.interpolate_bytes(
828 b"4:mode,%d:%s,", len(modestr), modestr)) 821 b"4:mode,%d:%s,", len(modestr), modestr))
829 dir_dgst.update(util.interpolate_bytes( 822 if sub_dir_errno == 0:
830 b"%d:%s,%d:%s,", 823 dir_dgst.update(util.interpolate_bytes(
831 len(sub_dir_algo), util.b(sub_dir_algo), 824 b"%d:%s,%d:%s,",
832 len(sub_dir_dgst), sub_dir_dgst)) 825 len(sub_dir_algo), util.b(sub_dir_algo),
826 len(sub_dir_dgst), sub_dir_dgst))
827 else:
828 # NOTE: error message is already printed here
829 dir_dgst.update(util.interpolate_bytes(
830 b"5:errno,%d:%s",
831 len(str(sub_dir_errno)),
832 util.b(str(sub_dir_errno))))
833 else: 833 else:
834 if fso.is_symlink and not self._follow_symlinks.file: 834 if fso.is_symlink and not self._follow_symlinks.file:
835 # 835 #
836 # Symbolic link to some filesystem object which is not 836 # Symbolic link to some filesystem object which is not
837 # determined to be a link to a directory or some other 837 # determined to be a link to a directory or some other
928 elif self._with_metadata_mode: 928 elif self._with_metadata_mode:
929 modestr = util.b(normalized_compatible_mode_str( 929 modestr = util.b(normalized_compatible_mode_str(
930 fso.stat.st_mode)) 930 fso.stat.st_mode))
931 dir_dgst.update(util.interpolate_bytes( 931 dir_dgst.update(util.interpolate_bytes(
932 b"4:mode,%d:%s,", len(modestr), modestr)) 932 b"4:mode,%d:%s,", len(modestr), modestr))
933 if not self._size_only:
934 dgst = digest.compute_digest_file(
935 self._algorithm[0],
936 fso.path,
937 use_mmap=self._use_mmap)
938 dir_dgst.update(util.interpolate_bytes(
939 b"%d:%s,%d:%s,",
940 len(self._algorithm[1]),
941 util.b(self._algorithm[1]),
942 len(dgst),
943 dgst))
944 if self._size_only: 933 if self._size_only:
934 #
935 # size can be printed here because .stat is
936 # available
937 #
945 self._writer.write_size(opath, fso.stat.st_size) 938 self._writer.write_size(opath, fso.stat.st_size)
946 else: 939 else:
947 sz = fso.stat.st_size if self._print_size else None 940 try:
948 self._writer.write_file_digest( 941 dgst = digest.compute_digest_file(
949 self._algorithm[1], opath, dgst, 942 self._algorithm[0],
950 use_base64=self._use_base64, 943 fso.path,
951 size=sz) 944 use_mmap=self._use_mmap)
945 except OSError as e:
946 dir_tainted = True
947 self._writer.write_error(
948 util.interpolate_bytes(
949 b"`%s': errno %d: %s",
950 opath,
951 e.errno,
952 util.b(e.strerror, "utf-8")))
953 sz = (fso.stat.st_size if self._print_size
954 else None)
955 self._writer.write_file_digest(
956 self._algorithm[1], opath, None,
957 size=sz)
958 else:
959 dir_dgst.update(util.interpolate_bytes(
960 b"%d:%s,%d:%s,",
961 len(self._algorithm[1]),
962 util.b(self._algorithm[1]),
963 len(dgst),
964 dgst))
965 sz = (fso.stat.st_size if self._print_size
966 else None)
967 self._writer.write_file_digest(
968 self._algorithm[1], opath, dgst,
969 use_base64=self._use_base64,
970 size=sz)
952 self._writer.flush() 971 self._writer.flush()
953 opath = join_output_path(top, None) 972 opath = join_output_path(top, None)
954 if opath: 973 if opath:
955 if self._utf8_mode: 974 if self._utf8_mode:
956 opath = walk.WalkDirEntry.alt_u8(opath) 975 opath = walk.WalkDirEntry.alt_u8(opath)
960 # 979 #
961 # IMPORTANT: Print errors BEFORE the associated digest or size 980 # IMPORTANT: Print errors BEFORE the associated digest or size
962 # line. Otherwise the "info" command has a problem. 981 # line. Otherwise the "info" command has a problem.
963 # 982 #
964 self._writer.write_error(b"directory is tainted") 983 self._writer.write_error(b"directory is tainted")
965 logging.error("Directory has filename and/or symlink problems: %r", 984 logging.error("Directory has problems: %r", opath)
966 opath)
967 if self._size_only: 985 if self._size_only:
968 self._writer.write_size(opath, dir_size) 986 self._writer.write_size(opath, dir_size)
969 else: 987 else:
970 sz = dir_size if self._print_size else None 988 sz = dir_size if self._print_size else None
971 self._writer.write_file_digest( 989 self._writer.write_file_digest(
972 self._algorithm[1], opath, dir_dgst.digest(), 990 self._algorithm[1], opath, dir_dgst.digest(),
973 use_base64=self._use_base64, size=sz) 991 use_base64=self._use_base64, size=sz)
974 self._writer.flush() 992 self._writer.flush()
975 return (self._algorithm[1], dir_dgst.digest(), dir_size) 993 return (0, self._algorithm[1], dir_dgst.digest(), dir_size)
976 994
977 995
978 def join_output_path(top, name): 996 def join_output_path(top, name):
979 if name is None: 997 if name is None:
980 # a path for a directory is to be computed 998 # a path for a directory is to be computed