changeset 277:9676ecd32a07

treesum: FIX: Proper error handling. Many errors are not detected at scandir times but then symlinks is to be followed or the content of a file is to be read. Note also that the size of a file can be available (because its directory is readable) but the content of the file is not.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 21 Feb 2025 16:51:03 +0100
parents f7850ff5cbe0
children 822cf3a1da22
files cutils/treesum.py
diffstat 1 files changed, 55 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/treesum.py	Fri Feb 21 16:33:56 2025 +0100
+++ b/cutils/treesum.py	Fri Feb 21 16:51:03 2025 +0100
@@ -621,7 +621,7 @@
             else:
                 self._writer.write_file_digest(self._algorithm[1], opath, None)
             self._writer.flush()
-            return (None, None, None)
+            return (e.errno, None, None, None)
         if self._utf8_mode:
             fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8)
         else:
@@ -799,20 +799,13 @@
                     #
 
                     # Get subdir data from recursing into it
-                    sub_dir_algo, sub_dir_dgst, sub_dir_size = self._generate(
-                        root, top + (fso.name, ))
+                    sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \
+                            self._generate(root, top + (fso.name, ))
 
-                    if (sub_dir_algo is None or sub_dir_dgst is None
-                            or sub_dir_size is None):
-                        #
-                        # This should not happen:
-                        # - top-level directories are handled above
-                        # - other filesystem objects should also have been
-                        #   handled already
-                        #
-                        assert False
-
-                    dir_size += sub_dir_size
+                    if sub_dir_errno == 0:
+                        dir_size += sub_dir_size
+                    else:
+                        dir_tainted = True
                     dir_dgst.update(util.interpolate_bytes(
                         b"1:/,%d:%s,",
                         len(effective_fso_name),
@@ -826,10 +819,17 @@
                             fso.stat.st_mode))
                         dir_dgst.update(util.interpolate_bytes(
                             b"4:mode,%d:%s,", len(modestr), modestr))
-                    dir_dgst.update(util.interpolate_bytes(
-                        b"%d:%s,%d:%s,",
-                        len(sub_dir_algo), util.b(sub_dir_algo),
-                        len(sub_dir_dgst), sub_dir_dgst))
+                    if sub_dir_errno == 0:
+                        dir_dgst.update(util.interpolate_bytes(
+                            b"%d:%s,%d:%s,",
+                            len(sub_dir_algo), util.b(sub_dir_algo),
+                            len(sub_dir_dgst), sub_dir_dgst))
+                    else:
+                        # NOTE: error message is already printed here
+                        dir_dgst.update(util.interpolate_bytes(
+                            b"5:errno,%d:%s",
+                            len(str(sub_dir_errno)),
+                            util.b(str(sub_dir_errno))))
             else:
                 if fso.is_symlink and not self._follow_symlinks.file:
                     #
@@ -930,25 +930,44 @@
                                 fso.stat.st_mode))
                             dir_dgst.update(util.interpolate_bytes(
                                 b"4:mode,%d:%s,", len(modestr), modestr))
-                        if not self._size_only:
-                            dgst = digest.compute_digest_file(
-                                self._algorithm[0],
-                                fso.path,
-                                use_mmap=self._use_mmap)
-                            dir_dgst.update(util.interpolate_bytes(
-                                b"%d:%s,%d:%s,",
-                                len(self._algorithm[1]),
-                                util.b(self._algorithm[1]),
-                                len(dgst),
-                                dgst))
                         if self._size_only:
+                            #
+                            # size can be printed here because .stat is
+                            # available
+                            #
                             self._writer.write_size(opath, fso.stat.st_size)
                         else:
-                            sz = fso.stat.st_size if self._print_size else None
-                            self._writer.write_file_digest(
-                                self._algorithm[1], opath, dgst,
-                                use_base64=self._use_base64,
-                                size=sz)
+                            try:
+                                dgst = digest.compute_digest_file(
+                                    self._algorithm[0],
+                                    fso.path,
+                                    use_mmap=self._use_mmap)
+                            except OSError as e:
+                                dir_tainted = True
+                                self._writer.write_error(
+                                    util.interpolate_bytes(
+                                        b"`%s': errno %d: %s",
+                                        opath,
+                                        e.errno,
+                                        util.b(e.strerror, "utf-8")))
+                                sz = (fso.stat.st_size if self._print_size
+                                      else None)
+                                self._writer.write_file_digest(
+                                    self._algorithm[1], opath, None,
+                                    size=sz)
+                            else:
+                                dir_dgst.update(util.interpolate_bytes(
+                                    b"%d:%s,%d:%s,",
+                                    len(self._algorithm[1]),
+                                    util.b(self._algorithm[1]),
+                                    len(dgst),
+                                    dgst))
+                                sz = (fso.stat.st_size if self._print_size
+                                      else None)
+                                self._writer.write_file_digest(
+                                    self._algorithm[1], opath, dgst,
+                                    use_base64=self._use_base64,
+                                    size=sz)
             self._writer.flush()
         opath = join_output_path(top, None)
         if opath:
@@ -962,8 +981,7 @@
             #            line. Otherwise the "info" command has a problem.
             #
             self._writer.write_error(b"directory is tainted")
-            logging.error("Directory has filename and/or symlink problems: %r",
-                          opath)
+            logging.error("Directory has problems: %r", opath)
         if self._size_only:
             self._writer.write_size(opath, dir_size)
         else:
@@ -972,7 +990,7 @@
                 self._algorithm[1], opath, dir_dgst.digest(),
                 use_base64=self._use_base64, size=sz)
         self._writer.flush()
-        return (self._algorithm[1], dir_dgst.digest(), dir_size)
+        return (0, self._algorithm[1], dir_dgst.digest(), dir_size)
 
 
 def join_output_path(top, name):