comparison cutils/treesum.py @ 372:bfe1160fbfd3

treesum: Make ERROR outputs more consistent: use native paths where possible
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 13 Apr 2025 14:15:33 +0200
parents 8a8a43e8369d
children 7044c2900890
comparison
equal deleted inserted replaced
371:29a301ff2501 372:bfe1160fbfd3
739 def _generate(self, root, top): 739 def _generate(self, root, top):
740 if top: 740 if top:
741 logging.debug("Recursing into directory: %s/%r", root, top) 741 logging.debug("Recursing into directory: %s/%r", root, top)
742 else: 742 else:
743 logging.debug("Handling root directory: %s", root) 743 logging.debug("Handling root directory: %s", root)
744 path = os.path.join(root, *top) if top else root 744 fullpath = os.path.join(root, *top) if top else root
745 # Determine also the path to be used for directory filtering 745 # Determine also the path to be used for directory filtering
746 fpath = join_output_path(top, None) if top else "" 746 fpath = join_output_path(top, None) if top else ""
747 if self._fnmatcher: 747 if self._fnmatcher:
748 logging.debug("Checking match against path: %s", fpath)
748 if not self._fnmatcher.shall_visit(fpath): 749 if not self._fnmatcher.shall_visit(fpath):
749 logging.debug("Skipping directory: %s", fpath) 750 logging.debug("Skipping directory: %s", fpath)
750 return (None, None, None, None) 751 return (None, None, None, None)
751 try: 752 try:
752 with walk.ScanDir(path) as dirscan: 753 logging.debug("Scanning directory: %s", fullpath)
754 with walk.ScanDir(fullpath) as dirscan:
753 fsobjects = list(dirscan) 755 fsobjects = list(dirscan)
754 except OSError as e: 756 except OSError as e:
755 # 757 #
756 # NOTE: Sync the error handler code with this method's 758 # NOTE: Sync the error handler code with this method's
757 # code below before returning! 759 # code below before returning!
767 elif e.errno == errno.ENOENT: 769 elif e.errno == errno.ENOENT:
768 # given object does not exist 770 # given object does not exist
769 errmsg = b"no such file or directory" 771 errmsg = b"no such file or directory"
770 else: 772 else:
771 raise 773 raise
772 if self._utf8_mode: 774 self._writer.write_error(
773 opath = walk.WalkDirEntry.alt_u8(path) 775 b"`%s': %s",
774 else: 776 walk.WalkDirEntry.alt_bytes(fullpath, self._utf8_mode),
775 opath = walk.WalkDirEntry.alt_fs(path) 777 errmsg)
776 self._writer.write_error(util.interpolate_bytes(
777 b"`%s': %s", opath, errmsg))
778 # Reuse from top 778 # Reuse from top
779 opath = join_output_path(top, None) 779 opath = walk.WalkDirEntry.alt_bytes(
780 if opath: 780 join_output_path(top, None), self._utf8_mode)
781 if self._utf8_mode:
782 opath = walk.WalkDirEntry.alt_u8(opath)
783 else:
784 opath = walk.WalkDirEntry.alt_fs(opath)
785 if self._size_only: 781 if self._size_only:
786 self._writer.write_size(opath, None) 782 self._writer.write_size(opath, None)
787 else: 783 else:
788 self._writer.write_file_digest( 784 self._writer.write_file_digest(
789 self._algorithm[1], opath, None, None) 785 self._algorithm[1], opath, None, None)
808 eno = errno.ESRCH 804 eno = errno.ESRCH
809 emsg = str(e) 805 emsg = str(e)
810 else: 806 else:
811 eno = 0 807 eno = 0
812 emsg = None 808 emsg = None
813 if self._utf8_mode: 809 opath = walk.WalkDirEntry.alt_bytes(
814 fpath = walk.WalkDirEntry.alt_u8(fpath) 810 join_output_path(top, None), self._utf8_mode)
815 else: 811 fpath = walk.WalkDirEntry.alt_bytes(
816 fpath = walk.WalkDirEntry.alt_fs(fpath) 812 fpath, self._utf8_mode)
817 opath = join_output_path(top, None)
818 if self._utf8_mode:
819 opath = walk.WalkDirEntry.alt_u8(opath)
820 else:
821 opath = walk.WalkDirEntry.alt_fs(opath)
822 if eno == 0: 813 if eno == 0:
823 # 814 #
824 # treesum file could be read. 815 # treesum file could be read.
825 # Now check whether the infos we got from it are 816 # Now check whether the infos we got from it are
826 # compatible with our current requirements 817 # compatible with our current requirements
830 if collector.size is None: 821 if collector.size is None:
831 # 822 #
832 # This is a severe error here: just the size 823 # This is a severe error here: just the size
833 # is required, but we have not got one. 824 # is required, but we have not got one.
834 # 825 #
835 self._writer.write_error(util.b( 826 self._writer.write_error(
836 util.interpolate_bytes( 827 b"Missing required size in treesum-file"
837 b"No size in treesum-file `%s' while" 828 b" `%s'",
838 b" requiring it", 829 walk.WalkDirEntry.alt_bytes(
839 fpath), 830 fso.npath, self._utf8_mode))
840 "utf-8"))
841 self._writer.write_size(opath, None) 831 self._writer.write_size(opath, None)
842 return (errno.ESRCH, None, None, None) 832 return (errno.ESRCH, None, None, None)
843 else: 833 else:
844 if self._print_size: 834 if self._print_size:
845 if collector.size is None: 835 if collector.size is None:
846 # 836 #
847 # XXX FIXME: Is this a **severe** error 837 # XXX FIXME: Is this a **severe** error
848 # here? Currently: no 838 # here? Currently: no
849 # 839 #
850 self._writer.write_error(util.b( 840 self._writer.write_error(
851 util.interpolate_bytes( 841 b"Missing size in treesum-file `%s'",
852 b"No size in treesum-file `%s'", 842 walk.WalkDirEntry.alt_bytes(
853 fpath), 843 fso.npath, self._utf8_mode))
854 "utf-8"))
855 sz = -1 844 sz = -1
856 else: 845 else:
857 sz = collector.size 846 sz = collector.size
858 else: 847 else:
859 sz = None 848 sz = None
861 # 850 #
862 # This is really a severe error. Most probably 851 # This is really a severe error. Most probably
863 # the treesum file was created with 852 # the treesum file was created with
864 # "--size-only" and contains no digest. 853 # "--size-only" and contains no digest.
865 # 854 #
866 self._writer.write_error(util.b( 855 self._writer.write_error(
867 util.interpolate_bytes( 856 b"Missing required digest in treesum-file"
868 b"No digest in treesum-file `%s' while" 857 b" `%s'",
869 b" it is required", 858 walk.WalkDirEntry.alt_bytes(
870 fpath), 859 fso.npath, self._utf8_mode))
871 "utf-8"))
872 self._writer.write_file_digest( 860 self._writer.write_file_digest(
873 collector.algorithm or "MD5", 861 collector.algorithm or "MD5",
874 opath, 862 opath,
875 None, 863 None,
876 sz) 864 sz)
889 collector.size) 877 collector.size)
890 else: 878 else:
891 # 879 #
892 # treesum file could not be read 880 # treesum file could not be read
893 # 881 #
894 self._writer.write_error(util.interpolate_bytes( 882 self._writer.write_error(
895 b"Cannot read treesum-file `%s' for directory" 883 b"Cannot read treesum-file `%s' for directory"
896 b"`%s': %s", 884 b"`%s': %s",
897 fpath, 885 walk.WalkDirEntry.alt_bytes(
898 opath, 886 fso.npath, self._utf8_mode),
899 util.b(emsg, "utf-8"))) 887 walk.WalkDirEntry.alt_u8(
888 join_output_path(top, None)),
889 util.b(emsg, "utf-8", "backslashreplace"))
900 if self._size_only: 890 if self._size_only:
901 self._writer.write_size(opath, None) 891 self._writer.write_size(opath, None)
902 else: 892 else:
903 self._writer.write_file_digest( 893 self._writer.write_file_digest(
904 self._algorithm[1], opath, None, None) 894 self._algorithm[1], opath, None, None)
933 fpath = opath 923 fpath = opath
934 if self._fnmatcher: 924 if self._fnmatcher:
935 if not self._fnmatcher.shall_visit(fpath): 925 if not self._fnmatcher.shall_visit(fpath):
936 logging.debug("Skipping: %s", fpath) 926 logging.debug("Skipping: %s", fpath)
937 continue 927 continue
938 if self._utf8_mode: 928 opath = walk.WalkDirEntry.alt_bytes(opath, self._utf8_mode)
939 opath = walk.WalkDirEntry.alt_u8(opath)
940 else:
941 opath = walk.WalkDirEntry.alt_fs(opath)
942 if fso.is_special: 929 if fso.is_special:
943 special_tag = util.b(fso.special_tag) 930 special_tag = util.b(fso.special_tag)
931 assert len(special_tag) == 1
944 assert fso.stat is not None # because .is_special is True 932 assert fso.stat is not None # because .is_special is True
945 if fso.is_symlink and not self._follow_symlinks.file: 933 if fso.is_symlink and not self._follow_symlinks.file:
946 linktgt = walk.WalkDirEntry.from_readlink( 934 linktgt = walk.WalkDirEntry.from_readlink(
947 os.readlink(fso.path)) 935 os.readlink(fso.npath))
948 linkdgst = self._algorithm[0]() 936 linkdgst = self._algorithm[0]()
949 if self._utf8_mode: 937 if self._utf8_mode:
950 if linktgt.u8path is None: 938 if linktgt.u8path is None:
951 dir_tainted = True 939 dir_tainted = True
952 linkdgst.update(linktgt.alt_u8path) 940 linkdgst.update(linktgt.alt_u8path)
1026 sz) 1014 sz)
1027 elif fso.is_dir: 1015 elif fso.is_dir:
1028 assert fso.stat is not None # because .is_dir is True 1016 assert fso.stat is not None # because .is_dir is True
1029 if fso.is_symlink and not self._follow_symlinks.directory: 1017 if fso.is_symlink and not self._follow_symlinks.directory:
1030 linktgt = walk.WalkDirEntry.from_readlink( 1018 linktgt = walk.WalkDirEntry.from_readlink(
1031 os.readlink(fso.path)) 1019 os.readlink(fso.npath))
1032 linkdgst = self._algorithm[0]() 1020 linkdgst = self._algorithm[0]()
1033 if self._utf8_mode: 1021 if self._utf8_mode:
1034 if linktgt.u8path is None: 1022 if linktgt.u8path is None:
1035 dir_tainted = True 1023 dir_tainted = True
1036 linkdgst.update(linktgt.alt_u8path) 1024 linkdgst.update(linktgt.alt_u8path)
1119 # Symbolic link to some filesystem object which is not 1107 # Symbolic link to some filesystem object which is not
1120 # determined to be a link to a directory or some other 1108 # determined to be a link to a directory or some other
1121 # special file (socket, FIFO, et al.). 1109 # special file (socket, FIFO, et al.).
1122 # 1110 #
1123 linktgt = walk.WalkDirEntry.from_readlink( 1111 linktgt = walk.WalkDirEntry.from_readlink(
1124 os.readlink(fso.path)) 1112 os.readlink(fso.npath))
1125 linkdgst = self._algorithm[0]() 1113 linkdgst = self._algorithm[0]()
1126 if self._utf8_mode: 1114 if self._utf8_mode:
1127 if linktgt.u8path is None: 1115 if linktgt.u8path is None:
1128 dir_tainted = True 1116 dir_tainted = True
1129 linkdgst.update(linktgt.alt_u8path) 1117 linkdgst.update(linktgt.alt_u8path)
1174 dir_tainted = True 1162 dir_tainted = True
1175 dir_dgst.update(util.interpolate_bytes( 1163 dir_dgst.update(util.interpolate_bytes(
1176 b"5:errno,%d:%s,", 1164 b"5:errno,%d:%s,",
1177 len(str(fso.stat_errno)), 1165 len(str(fso.stat_errno)),
1178 util.b(str(fso.stat_errno)))) 1166 util.b(str(fso.stat_errno))))
1179 self._writer.write_error(util.interpolate_bytes( 1167 self._writer.write_error(
1180 b"errno %d: %s", 1168 b"errno %d: %s",
1181 fso.stat_errno, 1169 fso.stat_errno,
1182 util.b(fso.stat_errstr, "utf-8"))) 1170 util.b(util.escape_for_output(fso.stat_errstr),
1171 "utf-8",
1172 "backslashreplace"))
1183 logging.error( 1173 logging.error(
1184 "Directory entry has symlink problems: %r", 1174 "Directory entry has symlink problems: %s",
1185 opath) 1175 fso.npath)
1186 if self._size_only: 1176 if self._size_only:
1187 self._writer.write_size(opath, None) 1177 self._writer.write_size(opath, None)
1188 else: 1178 else:
1189 self._writer.write_file_digest( 1179 self._writer.write_file_digest(
1190 self._algorithm[1], opath, None, None) 1180 self._algorithm[1], opath, None, None)
1220 self._writer.write_size(opath, fso.stat.st_size) 1210 self._writer.write_size(opath, fso.stat.st_size)
1221 else: 1211 else:
1222 try: 1212 try:
1223 dgst = digest.compute_digest_file( 1213 dgst = digest.compute_digest_file(
1224 self._algorithm[0], 1214 self._algorithm[0],
1225 fso.path, 1215 fso.npath,
1226 use_mmap=self._use_mmap) 1216 use_mmap=self._use_mmap)
1227 except OSError as e: 1217 except OSError as e:
1228 dir_tainted = True 1218 dir_tainted = True
1229 self._writer.write_error( 1219 self._writer.write_error(
1230 util.interpolate_bytes( 1220 b"`%s': errno %d: %s",
1231 b"`%s': errno %d: %s", 1221 walk.WalkDirEntry.alt_bytes(
1232 opath, 1222 fso.npath, self._utf8_mode),
1233 e.errno, 1223 e.errno,
1234 util.b(e.strerror, "utf-8"))) 1224 util.b(util.escape_for_output(e.strerror),
1225 "utf-8",
1226 "backslashreplace"))
1235 sz = (fso.stat.st_size if self._print_size 1227 sz = (fso.stat.st_size if self._print_size
1236 else None) 1228 else None)
1237 self._writer.write_file_digest( 1229 self._writer.write_file_digest(
1238 self._algorithm[1], opath, None, sz) 1230 self._algorithm[1], opath, None, sz)
1239 else: 1231 else:
1246 sz = (fso.stat.st_size if self._print_size 1238 sz = (fso.stat.st_size if self._print_size
1247 else None) 1239 else None)
1248 self._writer.write_file_digest( 1240 self._writer.write_file_digest(
1249 self._algorithm[1], opath, dgst, sz) 1241 self._algorithm[1], opath, dgst, sz)
1250 self._writer.flush() 1242 self._writer.flush()
1251 opath = join_output_path(top, None)
1252 if opath:
1253 if self._utf8_mode:
1254 opath = walk.WalkDirEntry.alt_u8(opath)
1255 else:
1256 opath = walk.WalkDirEntry.alt_fs(opath)
1257 if dir_tainted: 1243 if dir_tainted:
1258 # 1244 #
1259 # IMPORTANT: Print errors BEFORE the associated digest or size 1245 # IMPORTANT: Print errors BEFORE the associated digest or size
1260 # line. Otherwise the "info" command has a problem. 1246 # line. Otherwise the "info" command has a problem.
1261 # 1247 #
1262 self._writer.write_error(b"directory is tainted") 1248 self._writer.write_error(b"%s", b"directory is tainted")
1263 logging.error("Directory has problems: %r", opath) 1249 logging.error("Directory has problems: %s", fullpath)
1250 opath = walk.WalkDirEntry.alt_bytes(
1251 join_output_path(top, None), self._utf8_mode)
1264 if self._size_only: 1252 if self._size_only:
1265 self._writer.write_size(opath, dir_size) 1253 self._writer.write_size(opath, dir_size)
1266 else: 1254 else:
1267 sz = dir_size if self._print_size else None 1255 sz = dir_size if self._print_size else None
1268 self._writer.write_file_digest( 1256 self._writer.write_file_digest(
1453 self.write(b"VERSION = ") 1441 self.write(b"VERSION = ")
1454 self.writeln(util.b(version)) 1442 self.writeln(util.b(version))
1455 1443
1456 def write_comment(self, comment): 1444 def write_comment(self, comment):
1457 self.write(b"COMMENT (") 1445 self.write(b"COMMENT (")
1458 self.write(util.b(comment, "utf-8")) 1446 comment = util.escape_for_output(comment)
1447 self.write(util.b(comment, "utf-8", "backslashreplace"))
1459 self.writeln(b")") 1448 self.writeln(b")")
1460 1449
1461 def write_generator(self, generator): 1450 def write_generator(self, generator):
1462 self.write(b"GENERATOR (") 1451 self.write(b"GENERATOR (")
1463 self.write(util.b(generator, "utf-8")) 1452 self.write(util.b(generator, "utf-8"))
1464 self.writeln(b")") 1453 self.writeln(b")")
1465 1454
1466 def write_error(self, error): 1455 def write_error(self, fmt, *args):
1467 self.write(b"ERROR (") 1456 self.write(b"ERROR (")
1468 self.write(util.b(error, "utf-8")) 1457 self.write(util.interpolate_bytes(fmt, *args))
1469 self.writeln(b")") 1458 self.writeln(b")")
1470 1459
1471 def write_fsencoding(self, encoding): 1460 def write_fsencoding(self, encoding):
1472 self.write(b"FSENCODING = ") 1461 self.write(b"FSENCODING = ")
1473 self.writeln(util.b(encoding)) 1462 self.writeln(util.b(encoding))
1589 self.write(b"VERSION\t") 1578 self.write(b"VERSION\t")
1590 self.writeln(util.b(version)) 1579 self.writeln(util.b(version))
1591 1580
1592 def write_comment(self, comment): 1581 def write_comment(self, comment):
1593 self.write(b"COMMENT\t") 1582 self.write(b"COMMENT\t")
1594 self.writeln(util.b(comment, "utf-8")) 1583 comment = util.escape_for_output(comment)
1584 self.writeln(util.b(comment, "utf-8", "backslashreplace"))
1595 1585
1596 def write_generator(self, generator): 1586 def write_generator(self, generator):
1597 self.write(b"GENERATOR\t") 1587 self.write(b"GENERATOR\t")
1598 self.writeln(util.b(generator, "utf-8")) 1588 self.writeln(util.b(generator, "utf-8"))
1599 1589
1600 def write_error(self, error): 1590 def write_error(self, fmt, *args):
1601 self.write(b"ERROR\t") 1591 self.write(b"ERROR\t")
1602 self.writeln(util.b(error, "utf-8")) 1592 self.writeln(util.interpolate_bytes(fmt, *args))
1603 1593
1604 def write_fsencoding(self, encoding): 1594 def write_fsencoding(self, encoding):
1605 self.write(b"FSENCODING\t") 1595 self.write(b"FSENCODING\t")
1606 self.writeln(util.b(encoding)) 1596 self.writeln(util.b(encoding))
1607 1597