Mercurial > hgrepos > Python > apps > py-cutils
changeset 277:9676ecd32a07
treesum: FIX: Proper error handling.
Many errors are not detected at scandir times but then symlinks is to be
followed or the content of a file is to be read.
Note also that the size of a file can be available (because its directory
is readable) but the content of the file is not.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 21 Feb 2025 16:51:03 +0100 |
| parents | f7850ff5cbe0 |
| children | 822cf3a1da22 |
| files | cutils/treesum.py |
| diffstat | 1 files changed, 55 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/cutils/treesum.py Fri Feb 21 16:33:56 2025 +0100 +++ b/cutils/treesum.py Fri Feb 21 16:51:03 2025 +0100 @@ -621,7 +621,7 @@ else: self._writer.write_file_digest(self._algorithm[1], opath, None) self._writer.flush() - return (None, None, None) + return (e.errno, None, None, None) if self._utf8_mode: fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) else: @@ -799,20 +799,13 @@ # # Get subdir data from recursing into it - sub_dir_algo, sub_dir_dgst, sub_dir_size = self._generate( - root, top + (fso.name, )) + sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \ + self._generate(root, top + (fso.name, )) - if (sub_dir_algo is None or sub_dir_dgst is None - or sub_dir_size is None): - # - # This should not happen: - # - top-level directories are handled above - # - other filesystem objects should also have been - # handled already - # - assert False - - dir_size += sub_dir_size + if sub_dir_errno == 0: + dir_size += sub_dir_size + else: + dir_tainted = True dir_dgst.update(util.interpolate_bytes( b"1:/,%d:%s,", len(effective_fso_name), @@ -826,10 +819,17 @@ fso.stat.st_mode)) dir_dgst.update(util.interpolate_bytes( b"4:mode,%d:%s,", len(modestr), modestr)) - dir_dgst.update(util.interpolate_bytes( - b"%d:%s,%d:%s,", - len(sub_dir_algo), util.b(sub_dir_algo), - len(sub_dir_dgst), sub_dir_dgst)) + if sub_dir_errno == 0: + dir_dgst.update(util.interpolate_bytes( + b"%d:%s,%d:%s,", + len(sub_dir_algo), util.b(sub_dir_algo), + len(sub_dir_dgst), sub_dir_dgst)) + else: + # NOTE: error message is already printed here + dir_dgst.update(util.interpolate_bytes( + b"5:errno,%d:%s", + len(str(sub_dir_errno)), + util.b(str(sub_dir_errno)))) else: if fso.is_symlink and not self._follow_symlinks.file: # @@ -930,25 +930,44 @@ fso.stat.st_mode)) dir_dgst.update(util.interpolate_bytes( b"4:mode,%d:%s,", len(modestr), modestr)) - if not self._size_only: - dgst = digest.compute_digest_file( - self._algorithm[0], - fso.path, - use_mmap=self._use_mmap) - dir_dgst.update(util.interpolate_bytes( - b"%d:%s,%d:%s,", - len(self._algorithm[1]), - util.b(self._algorithm[1]), - len(dgst), - dgst)) if self._size_only: + # + # size can be printed here because .stat is + # available + # self._writer.write_size(opath, fso.stat.st_size) else: - sz = fso.stat.st_size if self._print_size else None - self._writer.write_file_digest( - self._algorithm[1], opath, dgst, - use_base64=self._use_base64, - size=sz) + try: + dgst = digest.compute_digest_file( + self._algorithm[0], + fso.path, + use_mmap=self._use_mmap) + except OSError as e: + dir_tainted = True + self._writer.write_error( + util.interpolate_bytes( + b"`%s': errno %d: %s", + opath, + e.errno, + util.b(e.strerror, "utf-8"))) + sz = (fso.stat.st_size if self._print_size + else None) + self._writer.write_file_digest( + self._algorithm[1], opath, None, + size=sz) + else: + dir_dgst.update(util.interpolate_bytes( + b"%d:%s,%d:%s,", + len(self._algorithm[1]), + util.b(self._algorithm[1]), + len(dgst), + dgst)) + sz = (fso.stat.st_size if self._print_size + else None) + self._writer.write_file_digest( + self._algorithm[1], opath, dgst, + use_base64=self._use_base64, + size=sz) self._writer.flush() opath = join_output_path(top, None) if opath: @@ -962,8 +981,7 @@ # line. Otherwise the "info" command has a problem. # self._writer.write_error(b"directory is tainted") - logging.error("Directory has filename and/or symlink problems: %r", - opath) + logging.error("Directory has problems: %r", opath) if self._size_only: self._writer.write_size(opath, dir_size) else: @@ -972,7 +990,7 @@ self._algorithm[1], opath, dir_dgst.digest(), use_base64=self._use_base64, size=sz) self._writer.flush() - return (self._algorithm[1], dir_dgst.digest(), dir_size) + return (0, self._algorithm[1], dir_dgst.digest(), dir_size) def join_output_path(top, name):
