# HG changeset patch # User Franz Glasner # Date 1739878744 -3600 # Node ID 0add8276e6b8482f9e3e6740d984f113438c5449 # Parent 188f448ab5e918a04cff89a09f3586a0b021c3e4 treesum: Handle errors like broken symlinks properly diff -r 188f448ab5e9 -r 0add8276e6b8 cutils/treesum.py --- a/cutils/treesum.py Mon Feb 17 00:12:33 2025 +0100 +++ b/cutils/treesum.py Tue Feb 18 12:39:04 2025 +0100 @@ -18,6 +18,7 @@ import binascii import collections import datetime +import errno import logging import os import re @@ -576,8 +577,41 @@ def _generate(self, root, top): logging.debug("Handling %s/%r", root, top) path = os.path.join(root, *top) if top else root - with walk.ScanDir(path) as dirscan: - fsobjects = list(dirscan) + try: + with walk.ScanDir(path) as dirscan: + fsobjects = list(dirscan) + except OSError as e: + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(path) + else: + opath = walk.WalkDirEntry.alt_fs(path) + if e.errno == errno.ENOTDIR: + # object exists but is not a directory + errmsg = b"not a directory" + elif e.errno in (errno.EACCES, errno.EPERM, + getattr(errno, "ENOTCAPABLE", errno.EACCES)): + # no permissions + errmsg = ( + b"access denied / no permissions / missing capabilities") + elif e.errno == errno.ENOENT: + # given object does not exist + errmsg = b"no such file or directory" + else: + raise + self._writer.write_error(util.interpolate_bytes( + b"`%s': %s", opath, errmsg)) + opath = join_output_path(top, None) + if opath: + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(opath) + else: + opath = walk.WalkDirEntry.alt_fs(opath) + if self._size_only: + self._writer.write_size(opath, None) + else: + self._writer.write_file_digest(self._algorithm[1], opath, None) + self._writer.flush() + return (None, None) if self._utf8_mode: fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) else: @@ -666,6 +700,15 @@ sub_dir_dgst, sub_dir_size = self._generate( root, top + (fso.name, )) + if sub_dir_dgst is None or sub_dir_size is None: + # + # This should not happen: + # - top-level directories are handled above + # - other filesystem objects should also have been + # handled already + # + assert False + dir_size += sub_dir_size if self._utf8_mode: if fso.u8name is None: @@ -794,42 +837,71 @@ else: dir_dgst.update(util.interpolate_bytes( b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) - dir_size += fso.stat.st_size - if self._with_metadata_mtime: - mtime = datetime.datetime.utcfromtimestamp( - int(fso.stat.st_mtime)) - mtime = util.b(mtime.isoformat("T") + "Z") - dir_dgst.update(util.interpolate_bytes( - b"5:mtime,%d:%s,", len(mtime), mtime)) - if self._with_metadata_full_mode: - modestr = util.b(normalized_mode_str(fso.stat.st_mode)) - dir_dgst.update(util.interpolate_bytes( - b"8:fullmode,%d:%s,", len(modestr), modestr)) - elif self._with_metadata_mode: - modestr = util.b(normalized_compatible_mode_str( - fso.stat.st_mode)) - dir_dgst.update(util.interpolate_bytes( - b"4:mode,%d:%s,", len(modestr), modestr)) - if not self._size_only: - dgst = digest.compute_digest_file( - self._algorithm[0], - fso.path, - use_mmap=self._use_mmap) - dir_dgst.update(util.interpolate_bytes( - b"%d:%s,", len(dgst), dgst)) opath = join_output_path(top, fso.name) if self._utf8_mode: opath = walk.WalkDirEntry.alt_u8(opath) else: opath = walk.WalkDirEntry.alt_fs(opath) - if self._size_only: - self._writer.write_size(opath, fso.stat.st_size) + if fso.stat is None: + # + # Error: most likely a broken symlink here + # + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"5:errno,%d:%s,", + len(str(fso.stat_errno)), + util.b(str(fso.stat_errno)))) + self._writer.write_error(util.interpolate_bytes( + b"errno %d: %s", + fso.stat_errno, + util.b(fso.stat_errstr, "utf-8"))) + logging.error( + "Directory entry has symlink problems: %r", + opath) + if self._size_only: + self._writer.write_size(opath, None) + else: + self._writer.write_file_digest( + self._algorithm[1], opath, None) else: - sz = fso.stat.st_size if self._print_size else None - self._writer.write_file_digest( - self._algorithm[1], opath, dgst, - use_base64=self._use_base64, - size=sz) + # + # Ok: File has normal stat info + # + # XXX FIXME: Handle special files (fifo, socket, + # block or char devices, ...). + # + dir_size += fso.stat.st_size + if self._with_metadata_mtime: + mtime = datetime.datetime.utcfromtimestamp( + int(fso.stat.st_mtime)) + mtime = util.b(mtime.isoformat("T") + "Z") + dir_dgst.update(util.interpolate_bytes( + b"5:mtime,%d:%s,", len(mtime), mtime)) + if self._with_metadata_full_mode: + modestr = util.b( + normalized_mode_str(fso.stat.st_mode)) + dir_dgst.update(util.interpolate_bytes( + b"8:fullmode,%d:%s,", len(modestr), modestr)) + elif self._with_metadata_mode: + modestr = util.b(normalized_compatible_mode_str( + fso.stat.st_mode)) + dir_dgst.update(util.interpolate_bytes( + b"4:mode,%d:%s,", len(modestr), modestr)) + if not self._size_only: + dgst = digest.compute_digest_file( + self._algorithm[0], + fso.path, + use_mmap=self._use_mmap) + dir_dgst.update(util.interpolate_bytes( + b"%d:%s,", len(dgst), dgst)) + if self._size_only: + self._writer.write_size(opath, fso.stat.st_size) + else: + sz = fso.stat.st_size if self._print_size else None + self._writer.write_file_digest( + self._algorithm[1], opath, dgst, + use_base64=self._use_base64, + size=sz) self._writer.flush() opath = join_output_path(top, None) if opath: @@ -837,16 +909,17 @@ opath = walk.WalkDirEntry.alt_u8(opath) else: opath = walk.WalkDirEntry.alt_fs(opath) + if dir_tainted: + # + # IMPORTANT: Print errors BEFORE the associated digest or size + # line. Otherwise the "info" command has a problem. + # + self._writer.write_error(b"directory is tainted") + logging.error("Directory has filename and/or symlink problems: %r", + opath) if self._size_only: self._writer.write_size(opath, dir_size) else: - if dir_tainted: - # - # IMPORTANT: Print errors BEFORE the associated digest line. - # Otherwise the "info" command has a problem. - # - self._writer.write_error(b"directory is tainted") - logging.error("Directory has filename problems: %r", opath) sz = dir_size if self._print_size else None self._writer.write_file_digest( self._algorithm[1], opath, dir_dgst.digest(), @@ -1014,26 +1087,32 @@ assert isinstance(filename, bytes) self.write(b"SIZE (") self.write(filename) - self.write(b") = ") - self.writeln(util.b(str(sz))) + self.write(b")") + if sz is not None: + self.write(b" = ") + self.write(util.b(str(sz))) + self.writeln(b"") def write_file_digest(self, algorithm, filename, digest, use_base64=False, size=None): - digest = (base64.b64encode(digest) - if use_base64 - else binascii.hexlify(digest)) + if digest is not None: + digest = (base64.b64encode(digest) + if use_base64 + else binascii.hexlify(digest)) if filename != b"./@/": filename = util.normalize_filename(filename, True) self.write(util.b(algorithm)) self.write(b" (") self.write(filename) - self.write(b") = ") - self.write(digest) - if size is not None: - self.write(b",") - self.writeln(util.b(str(size))) - else: - self.writeln(b"") + self.write(b")") + if digest is not None or size is not None: + self.write(b" = ") + if digest is not None: + self.write(digest) + if size is not None: + self.write(b",") + self.write(util.b(str(size))) + self.writeln(b"") def finish(self): """Finish a block and write the current CRC""" @@ -1076,8 +1155,8 @@ PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long - PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long - PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long + PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long + PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?)?[ \t]*\r?\n\Z") # noqa: E501 line too long def __init__(self, _fp, _filename, _own_fp): self._fp = _fp @@ -1211,24 +1290,31 @@ mo = self.PATTERN3.search(line) if mo: self._update_crc(line) - return ("SIZE", mo.group(1), int(util.n(mo.group(2)), 10)) + if mo.group(2): + return ("SIZE", mo.group(1), + int(util.n(mo.group(3)), 10)) + else: + return ("SIZE", mo.group(1), None) else: mo = self.PATTERN4.search(line) if mo: self._update_crc(line) algo_name = util.n(mo.group(1)) - if (len(mo.group(3)) == - 2 * self._get_digest_size(algo_name)): - # hex - digest = binascii.unhexlify(mo.group(3)) + if mo.group(3): + if (len(mo.group(4)) == + 2 * self._get_digest_size(algo_name)): + # hex + digest = binascii.unhexlify(mo.group(4)) + else: + # base64 + digest = base64.b64decode(mo.group(4)) + if mo.group(5): + size = int(util.n(mo.group(6)), 10) + else: + size = None + return (algo_name, mo.group(2), digest, size) else: - # base64 - digest = base64.b64decode(mo.group(3)) - if mo.group(4): - size = int(util.n(mo.group(5)), 10) - else: - size = None - return (algo_name, mo.group(2), digest, size) + return (algo_name, mo.group(2), None, None) else: assert False, line return line diff -r 188f448ab5e9 -r 0add8276e6b8 cutils/util/walk.py --- a/cutils/util/walk.py Mon Feb 17 00:12:33 2025 +0100 +++ b/cutils/util/walk.py Tue Feb 18 12:39:04 2025 +0100 @@ -53,7 +53,8 @@ """ __slots__ = ("_name", "_path", # encoded as given in the ctor - "_is_symlink", "_is_dir", "_stat_result", + "_is_symlink", "_is_dir", "_stat_result", "_stat_errno", + "_stat_errstr", "_alt_fsname", "_alt_u8name") def __init__(self, name, path): @@ -61,7 +62,8 @@ """The name exactly as given in the ctor""" self._path = _unix_path(path) """The path as given in the ctor -- but normalized to have slashes""" - self._is_symlink = self._is_dir = self._stat_result = None + self._is_symlink = self._is_dir = self._stat_result = \ + self._stat_errno = self._stat_errstr = None self._alt_fsname = self._alt_u8name = _notset @property @@ -232,6 +234,14 @@ def stat(self): return self._stat_result + @property + def stat_errno(self): + return self._stat_errno + + @property + def stat_errstr(self): + return self._stat_errstr + def __repr__(self): tag = "" if self._is_symlink: @@ -261,8 +271,13 @@ # is not a symbolic link, same behaviour than os.path.islink(). # w._is_symlink = False - # Do not supress errors here and (consistently) follow symlinks - w._stat_result = entry.stat(follow_symlinks=True) + # Consistently follow symlinks + try: + w._stat_result = entry.stat(follow_symlinks=True) + except OSError as e: + w._stat_result = None + w._stat_errno = e.errno + w._stat_errstr = e.strerror return w @classmethod @@ -286,7 +301,12 @@ # w._is_symlink = False if _do_stat: - w._stat_result = os.stat(w._path) + try: + w._stat_result = os.stat(w._path) + except OSError as e: + w._stat_result = None + w._stat_errno = e.errno + w._stat_errstr = e.strerror return w @classmethod