Mercurial > hgrepos > Python > apps > py-cutils
diff cutils/treesum.py @ 204:07f1d79e6674
Fully implemented UTF-8 mode for treeview.
While doing this refactored "normal" mode (using the filesystem encoding).
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 21 Jan 2025 20:31:48 +0100 |
| parents | b9b38584919b |
| children | 63088d3675bb |
line wrap: on
line diff
--- a/cutils/treesum.py Tue Jan 21 18:57:02 2025 +0100 +++ b/cutils/treesum.py Tue Jan 21 20:31:48 2025 +0100 @@ -406,9 +406,14 @@ if self._minimal is not None: self._outfp.write(format_bsd_line( - "ROOT", None, self._minimal if self._minimal else "", False)) + "ROOT", + None, + (walk.WalkDirEntry.alt_u8(self._minimal) + if self._minimal else b""), + False)) else: - self._outfp.write(format_bsd_line("ROOT", None, root, False)) + self._outfp.write(format_bsd_line( + "ROOT", None, walk.WalkDirEntry.alt_u8(root), False)) self._outfp.flush() if not self._handle_root_logical and os.path.islink(root): @@ -457,6 +462,7 @@ fsobjects.sort(key=walk.WalkDirEntry.sort_key) dir_dgst = self._algorithm[0]() dir_size = 0 + dir_tainted = False for fso in fsobjects: if fso.is_dir: if fso.is_symlink and not self._follow_directory_symlinks: @@ -464,11 +470,48 @@ os.readlink(fso.path)) # linktgt = util.fsencode(os.readlink(fso.path))) linkdgst = self._algorithm[0]() - linkdgst.update( - util.interpolate_bytes( - b"%d:%s,", len(linktgt.fspath), linktgt.fspath)) - dir_dgst.update(util.interpolate_bytes( - b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) + if self._utf8_mode: + if linktgt.u8path is None: + dir_tainted = True + linkdgst.update(util.interpolate_bytes( + b"%d:%s,", + len(linktgt.alt_u8path), + linktgt.alt_u8path)) + else: + linkdgst.update(util.interpolate_bytes( + b"%d:%s,", + len(linktgt.u8path), + linktgt.u8path)) + if fso.u8name is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:S,%d:%s,", + len(fso.alt_u8name), + fso.alt_u8name)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:S,%d:%s,", len(fso.u8name), fso.u8name)) + else: + if linktgt.fspath is None: + dir_tainted = True + linkdgst.update(util.interpolate_bytes( + b"%d:%s,", + len(linktgt.alt_fspath), + linktgt.alt_fspath)) + else: + linkdgst.update(util.interpolate_bytes( + b"%d:%s,", + len(linktgt.fspath), + linktgt.fspath)) + if fso.fsname is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:S,%d:%s,", + len(fso.alt_fsname), + fso.alt_fsname)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) # # - no mtime and no mode for symlinks # - also does not count for dir_size @@ -476,7 +519,11 @@ dir_dgst.update(util.interpolate_bytes( b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) - opath = "/".join(top) + "/" + fso.name if top else fso.name + opath = join_output_path(top, fso.name) + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(opath) + else: + opath = walk.WalkDirEntry.alt_fs(opath) if self._size_only: self._outfp.write(format_bsd_line( "SIZE", None, "%s/./@/" % (opath,), False, 0)) @@ -497,8 +544,26 @@ root, top + (fso.name, )) dir_size += sub_dir_size - dir_dgst.update(util.interpolate_bytes( - b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) + if self._utf8_mode: + if fso.u8name is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:d,%d:%s,", + len(fso.alt_u8name), + fso.alt_u8name)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:d,%d:%s,", len(fso.u8name), fso.u8name)) + else: + if fso.fsname is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:d,%d:%s,", + len(fso.alt_fsname), + fso.alt_fsname)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) dir_dgst.update(util.interpolate_bytes( b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst)) if self._with_metadata_full_mode: @@ -511,8 +576,26 @@ dir_dgst.update(util.interpolate_bytes( b"4:mode,%d:%s,", len(modestr), modestr)) else: - dir_dgst.update(util.interpolate_bytes( - b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) + if self._utf8_mode: + if fso.u8name is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:f,%d:%s,", + len(fso.alt_u8name), + fso.alt_u8name)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) + else: + if fso.fsname is None: + dir_tainted = True + dir_dgst.update(util.interpolate_bytes( + b"1:f,%d:%s,", + len(fso.alt_fsname), + fso.alt_fsname)) + else: + dir_dgst.update(util.interpolate_bytes( + b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) dir_size += fso.stat.st_size if self._with_metadata_mtime: mtime = datetime.datetime.utcfromtimestamp( @@ -534,7 +617,11 @@ self._algorithm[0], fso.path, use_mmap=self._use_mmap) dir_dgst.update(util.interpolate_bytes( b"%d:%s,", len(dgst), dgst)) - opath = "/".join(top) + "/" + fso.name if top else fso.name + opath = join_output_path(top, fso.name) + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(opath) + else: + opath = walk.WalkDirEntry.alt_fs(opath) if self._size_only: self._outfp.write(format_bsd_line( "SIZE", None, opath, False, fso.stat.st_size)) @@ -548,12 +635,19 @@ self._algorithm[1], dgst, opath, self._use_base64)) self._outfp.flush() - - opath = "/".join(top) + "/" if top else "" + opath = join_output_path(top, None) + if opath: + if self._utf8_mode: + opath = walk.WalkDirEntry.alt_u8(opath) + else: + opath = walk.WalkDirEntry.alt_fs(opath) if self._size_only: self._outfp.write(format_bsd_line( "SIZE", None, opath, False, dir_size)) else: + if dir_tainted: + self._outfp.write(format_bsd_line( + b"ERROR", None, b"directory is tainted", False, None)) if self._print_size: self._outfp.write(format_bsd_line( self._algorithm[1], dir_dgst.digest(), opath, @@ -566,6 +660,27 @@ return (dir_dgst.digest(), dir_size) +def join_output_path(top, name): + if name is None: + # a path for a directory is to be computed + if top: + if isinstance(top[0], bytes): + return b"/".join(top) + b"/" + else: + return u"/".join(top) + u"/" + else: + return b"" + else: + # a path for a normal file is to be computed + if top: + if isinstance(name, bytes): + return b"/".join(top) + b"/" + name + else: + return u"/".join(top) + u"/" + name + else: + return name + + class CRC32Output(object): """Wrapper for a minimal binary file contextmanager that calculates @@ -652,9 +767,9 @@ assert filename is None return util.interpolate_bytes(b"%s = %s%s", what, util.b(value), ls) assert filename is not None - if what == b"COMMENT": + if what in (b"COMMENT", b"ERROR"): return util.interpolate_bytes( - b"COMMENT (%s)%s", util.b(filename, "utf-8"), ls) + b"%s (%s)%s", what, util.b(filename, "utf-8"), ls) if not isinstance(filename, bytes): filename = util.fsencode(filename) if what == b"SIZE": @@ -683,11 +798,11 @@ """ - PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines + PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long - PATTERN2 = re.compile(br"\A(ROOT|COMMENT)[ \t]*\((.*)\)[ \t]*\r?\n\Z") - PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long - PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long + PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long + PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long + PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long def __init__(self, _fp, _filename, _own_fp): self._fp = _fp @@ -812,8 +927,8 @@ mo = self.PATTERN2.search(line) if mo: self._update_crc(line) - if mo.group(1) == b"COMMENT": - return ("COMMENT", util.u(mo.group(2), "utf-8")) + if mo.group(1) in (b"COMMENT", b"ERROR"): + return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8")) elif mo.group(1) == b"ROOT": return ("ROOT", mo.group(2)) assert False, line
