Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 266:0add8276e6b8
treesum: Handle errors like broken symlinks properly
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 18 Feb 2025 12:39:04 +0100 |
| parents | c3d6599c1b5e |
| children | b9aa65a30b4c |
comparison
equal
deleted
inserted
replaced
| 265:188f448ab5e9 | 266:0add8276e6b8 |
|---|---|
| 16 import argparse | 16 import argparse |
| 17 import base64 | 17 import base64 |
| 18 import binascii | 18 import binascii |
| 19 import collections | 19 import collections |
| 20 import datetime | 20 import datetime |
| 21 import errno | |
| 21 import logging | 22 import logging |
| 22 import os | 23 import os |
| 23 import re | 24 import re |
| 24 import stat | 25 import stat |
| 25 import sys | 26 import sys |
| 574 self._writer.finish() | 575 self._writer.finish() |
| 575 | 576 |
| 576 def _generate(self, root, top): | 577 def _generate(self, root, top): |
| 577 logging.debug("Handling %s/%r", root, top) | 578 logging.debug("Handling %s/%r", root, top) |
| 578 path = os.path.join(root, *top) if top else root | 579 path = os.path.join(root, *top) if top else root |
| 579 with walk.ScanDir(path) as dirscan: | 580 try: |
| 580 fsobjects = list(dirscan) | 581 with walk.ScanDir(path) as dirscan: |
| 582 fsobjects = list(dirscan) | |
| 583 except OSError as e: | |
| 584 if self._utf8_mode: | |
| 585 opath = walk.WalkDirEntry.alt_u8(path) | |
| 586 else: | |
| 587 opath = walk.WalkDirEntry.alt_fs(path) | |
| 588 if e.errno == errno.ENOTDIR: | |
| 589 # object exists but is not a directory | |
| 590 errmsg = b"not a directory" | |
| 591 elif e.errno in (errno.EACCES, errno.EPERM, | |
| 592 getattr(errno, "ENOTCAPABLE", errno.EACCES)): | |
| 593 # no permissions | |
| 594 errmsg = ( | |
| 595 b"access denied / no permissions / missing capabilities") | |
| 596 elif e.errno == errno.ENOENT: | |
| 597 # given object does not exist | |
| 598 errmsg = b"no such file or directory" | |
| 599 else: | |
| 600 raise | |
| 601 self._writer.write_error(util.interpolate_bytes( | |
| 602 b"`%s': %s", opath, errmsg)) | |
| 603 opath = join_output_path(top, None) | |
| 604 if opath: | |
| 605 if self._utf8_mode: | |
| 606 opath = walk.WalkDirEntry.alt_u8(opath) | |
| 607 else: | |
| 608 opath = walk.WalkDirEntry.alt_fs(opath) | |
| 609 if self._size_only: | |
| 610 self._writer.write_size(opath, None) | |
| 611 else: | |
| 612 self._writer.write_file_digest(self._algorithm[1], opath, None) | |
| 613 self._writer.flush() | |
| 614 return (None, None) | |
| 581 if self._utf8_mode: | 615 if self._utf8_mode: |
| 582 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) | 616 fsobjects.sort(key=walk.WalkDirEntry.sort_key_u8) |
| 583 else: | 617 else: |
| 584 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) | 618 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) |
| 585 dir_dgst = self._algorithm[0]() | 619 dir_dgst = self._algorithm[0]() |
| 664 | 698 |
| 665 # Get subdir data from recursing into it | 699 # Get subdir data from recursing into it |
| 666 sub_dir_dgst, sub_dir_size = self._generate( | 700 sub_dir_dgst, sub_dir_size = self._generate( |
| 667 root, top + (fso.name, )) | 701 root, top + (fso.name, )) |
| 668 | 702 |
| 703 if sub_dir_dgst is None or sub_dir_size is None: | |
| 704 # | |
| 705 # This should not happen: | |
| 706 # - top-level directories are handled above | |
| 707 # - other filesystem objects should also have been | |
| 708 # handled already | |
| 709 # | |
| 710 assert False | |
| 711 | |
| 669 dir_size += sub_dir_size | 712 dir_size += sub_dir_size |
| 670 if self._utf8_mode: | 713 if self._utf8_mode: |
| 671 if fso.u8name is None: | 714 if fso.u8name is None: |
| 672 dir_tainted = True | 715 dir_tainted = True |
| 673 dir_dgst.update(util.interpolate_bytes( | 716 dir_dgst.update(util.interpolate_bytes( |
| 792 len(fso.alt_fsname), | 835 len(fso.alt_fsname), |
| 793 fso.alt_fsname)) | 836 fso.alt_fsname)) |
| 794 else: | 837 else: |
| 795 dir_dgst.update(util.interpolate_bytes( | 838 dir_dgst.update(util.interpolate_bytes( |
| 796 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) | 839 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) |
| 797 dir_size += fso.stat.st_size | |
| 798 if self._with_metadata_mtime: | |
| 799 mtime = datetime.datetime.utcfromtimestamp( | |
| 800 int(fso.stat.st_mtime)) | |
| 801 mtime = util.b(mtime.isoformat("T") + "Z") | |
| 802 dir_dgst.update(util.interpolate_bytes( | |
| 803 b"5:mtime,%d:%s,", len(mtime), mtime)) | |
| 804 if self._with_metadata_full_mode: | |
| 805 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) | |
| 806 dir_dgst.update(util.interpolate_bytes( | |
| 807 b"8:fullmode,%d:%s,", len(modestr), modestr)) | |
| 808 elif self._with_metadata_mode: | |
| 809 modestr = util.b(normalized_compatible_mode_str( | |
| 810 fso.stat.st_mode)) | |
| 811 dir_dgst.update(util.interpolate_bytes( | |
| 812 b"4:mode,%d:%s,", len(modestr), modestr)) | |
| 813 if not self._size_only: | |
| 814 dgst = digest.compute_digest_file( | |
| 815 self._algorithm[0], | |
| 816 fso.path, | |
| 817 use_mmap=self._use_mmap) | |
| 818 dir_dgst.update(util.interpolate_bytes( | |
| 819 b"%d:%s,", len(dgst), dgst)) | |
| 820 opath = join_output_path(top, fso.name) | 840 opath = join_output_path(top, fso.name) |
| 821 if self._utf8_mode: | 841 if self._utf8_mode: |
| 822 opath = walk.WalkDirEntry.alt_u8(opath) | 842 opath = walk.WalkDirEntry.alt_u8(opath) |
| 823 else: | 843 else: |
| 824 opath = walk.WalkDirEntry.alt_fs(opath) | 844 opath = walk.WalkDirEntry.alt_fs(opath) |
| 825 if self._size_only: | 845 if fso.stat is None: |
| 826 self._writer.write_size(opath, fso.stat.st_size) | 846 # |
| 847 # Error: most likely a broken symlink here | |
| 848 # | |
| 849 dir_tainted = True | |
| 850 dir_dgst.update(util.interpolate_bytes( | |
| 851 b"5:errno,%d:%s,", | |
| 852 len(str(fso.stat_errno)), | |
| 853 util.b(str(fso.stat_errno)))) | |
| 854 self._writer.write_error(util.interpolate_bytes( | |
| 855 b"errno %d: %s", | |
| 856 fso.stat_errno, | |
| 857 util.b(fso.stat_errstr, "utf-8"))) | |
| 858 logging.error( | |
| 859 "Directory entry has symlink problems: %r", | |
| 860 opath) | |
| 861 if self._size_only: | |
| 862 self._writer.write_size(opath, None) | |
| 863 else: | |
| 864 self._writer.write_file_digest( | |
| 865 self._algorithm[1], opath, None) | |
| 827 else: | 866 else: |
| 828 sz = fso.stat.st_size if self._print_size else None | 867 # |
| 829 self._writer.write_file_digest( | 868 # Ok: File has normal stat info |
| 830 self._algorithm[1], opath, dgst, | 869 # |
| 831 use_base64=self._use_base64, | 870 # XXX FIXME: Handle special files (fifo, socket, |
| 832 size=sz) | 871 # block or char devices, ...). |
| 872 # | |
| 873 dir_size += fso.stat.st_size | |
| 874 if self._with_metadata_mtime: | |
| 875 mtime = datetime.datetime.utcfromtimestamp( | |
| 876 int(fso.stat.st_mtime)) | |
| 877 mtime = util.b(mtime.isoformat("T") + "Z") | |
| 878 dir_dgst.update(util.interpolate_bytes( | |
| 879 b"5:mtime,%d:%s,", len(mtime), mtime)) | |
| 880 if self._with_metadata_full_mode: | |
| 881 modestr = util.b( | |
| 882 normalized_mode_str(fso.stat.st_mode)) | |
| 883 dir_dgst.update(util.interpolate_bytes( | |
| 884 b"8:fullmode,%d:%s,", len(modestr), modestr)) | |
| 885 elif self._with_metadata_mode: | |
| 886 modestr = util.b(normalized_compatible_mode_str( | |
| 887 fso.stat.st_mode)) | |
| 888 dir_dgst.update(util.interpolate_bytes( | |
| 889 b"4:mode,%d:%s,", len(modestr), modestr)) | |
| 890 if not self._size_only: | |
| 891 dgst = digest.compute_digest_file( | |
| 892 self._algorithm[0], | |
| 893 fso.path, | |
| 894 use_mmap=self._use_mmap) | |
| 895 dir_dgst.update(util.interpolate_bytes( | |
| 896 b"%d:%s,", len(dgst), dgst)) | |
| 897 if self._size_only: | |
| 898 self._writer.write_size(opath, fso.stat.st_size) | |
| 899 else: | |
| 900 sz = fso.stat.st_size if self._print_size else None | |
| 901 self._writer.write_file_digest( | |
| 902 self._algorithm[1], opath, dgst, | |
| 903 use_base64=self._use_base64, | |
| 904 size=sz) | |
| 833 self._writer.flush() | 905 self._writer.flush() |
| 834 opath = join_output_path(top, None) | 906 opath = join_output_path(top, None) |
| 835 if opath: | 907 if opath: |
| 836 if self._utf8_mode: | 908 if self._utf8_mode: |
| 837 opath = walk.WalkDirEntry.alt_u8(opath) | 909 opath = walk.WalkDirEntry.alt_u8(opath) |
| 838 else: | 910 else: |
| 839 opath = walk.WalkDirEntry.alt_fs(opath) | 911 opath = walk.WalkDirEntry.alt_fs(opath) |
| 912 if dir_tainted: | |
| 913 # | |
| 914 # IMPORTANT: Print errors BEFORE the associated digest or size | |
| 915 # line. Otherwise the "info" command has a problem. | |
| 916 # | |
| 917 self._writer.write_error(b"directory is tainted") | |
| 918 logging.error("Directory has filename and/or symlink problems: %r", | |
| 919 opath) | |
| 840 if self._size_only: | 920 if self._size_only: |
| 841 self._writer.write_size(opath, dir_size) | 921 self._writer.write_size(opath, dir_size) |
| 842 else: | 922 else: |
| 843 if dir_tainted: | |
| 844 # | |
| 845 # IMPORTANT: Print errors BEFORE the associated digest line. | |
| 846 # Otherwise the "info" command has a problem. | |
| 847 # | |
| 848 self._writer.write_error(b"directory is tainted") | |
| 849 logging.error("Directory has filename problems: %r", opath) | |
| 850 sz = dir_size if self._print_size else None | 923 sz = dir_size if self._print_size else None |
| 851 self._writer.write_file_digest( | 924 self._writer.write_file_digest( |
| 852 self._algorithm[1], opath, dir_dgst.digest(), | 925 self._algorithm[1], opath, dir_dgst.digest(), |
| 853 use_base64=self._use_base64, size=sz) | 926 use_base64=self._use_base64, size=sz) |
| 854 self._writer.flush() | 927 self._writer.flush() |
| 1012 | 1085 |
| 1013 def write_size(self, filename, sz): | 1086 def write_size(self, filename, sz): |
| 1014 assert isinstance(filename, bytes) | 1087 assert isinstance(filename, bytes) |
| 1015 self.write(b"SIZE (") | 1088 self.write(b"SIZE (") |
| 1016 self.write(filename) | 1089 self.write(filename) |
| 1017 self.write(b") = ") | 1090 self.write(b")") |
| 1018 self.writeln(util.b(str(sz))) | 1091 if sz is not None: |
| 1092 self.write(b" = ") | |
| 1093 self.write(util.b(str(sz))) | |
| 1094 self.writeln(b"") | |
| 1019 | 1095 |
| 1020 def write_file_digest(self, algorithm, filename, digest, | 1096 def write_file_digest(self, algorithm, filename, digest, |
| 1021 use_base64=False, size=None): | 1097 use_base64=False, size=None): |
| 1022 digest = (base64.b64encode(digest) | 1098 if digest is not None: |
| 1023 if use_base64 | 1099 digest = (base64.b64encode(digest) |
| 1024 else binascii.hexlify(digest)) | 1100 if use_base64 |
| 1101 else binascii.hexlify(digest)) | |
| 1025 if filename != b"./@/": | 1102 if filename != b"./@/": |
| 1026 filename = util.normalize_filename(filename, True) | 1103 filename = util.normalize_filename(filename, True) |
| 1027 self.write(util.b(algorithm)) | 1104 self.write(util.b(algorithm)) |
| 1028 self.write(b" (") | 1105 self.write(b" (") |
| 1029 self.write(filename) | 1106 self.write(filename) |
| 1030 self.write(b") = ") | 1107 self.write(b")") |
| 1031 self.write(digest) | 1108 if digest is not None or size is not None: |
| 1032 if size is not None: | 1109 self.write(b" = ") |
| 1033 self.write(b",") | 1110 if digest is not None: |
| 1034 self.writeln(util.b(str(size))) | 1111 self.write(digest) |
| 1035 else: | 1112 if size is not None: |
| 1036 self.writeln(b"") | 1113 self.write(b",") |
| 1114 self.write(util.b(str(size))) | |
| 1115 self.writeln(b"") | |
| 1037 | 1116 |
| 1038 def finish(self): | 1117 def finish(self): |
| 1039 """Finish a block and write the current CRC""" | 1118 """Finish a block and write the current CRC""" |
| 1040 crc = self._crc.hexdigest() | 1119 crc = self._crc.hexdigest() |
| 1041 self.write(b"CRC32 = ") | 1120 self.write(b"CRC32 = ") |
| 1074 """ | 1153 """ |
| 1075 | 1154 |
| 1076 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines | 1155 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines |
| 1077 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long | 1156 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1078 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long | 1157 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1079 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long | 1158 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1080 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long | 1159 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?)?[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1081 | 1160 |
| 1082 def __init__(self, _fp, _filename, _own_fp): | 1161 def __init__(self, _fp, _filename, _own_fp): |
| 1083 self._fp = _fp | 1162 self._fp = _fp |
| 1084 self._own_fp = _own_fp | 1163 self._own_fp = _own_fp |
| 1085 self._filename = _filename | 1164 self._filename = _filename |
| 1209 assert False, line | 1288 assert False, line |
| 1210 else: | 1289 else: |
| 1211 mo = self.PATTERN3.search(line) | 1290 mo = self.PATTERN3.search(line) |
| 1212 if mo: | 1291 if mo: |
| 1213 self._update_crc(line) | 1292 self._update_crc(line) |
| 1214 return ("SIZE", mo.group(1), int(util.n(mo.group(2)), 10)) | 1293 if mo.group(2): |
| 1294 return ("SIZE", mo.group(1), | |
| 1295 int(util.n(mo.group(3)), 10)) | |
| 1296 else: | |
| 1297 return ("SIZE", mo.group(1), None) | |
| 1215 else: | 1298 else: |
| 1216 mo = self.PATTERN4.search(line) | 1299 mo = self.PATTERN4.search(line) |
| 1217 if mo: | 1300 if mo: |
| 1218 self._update_crc(line) | 1301 self._update_crc(line) |
| 1219 algo_name = util.n(mo.group(1)) | 1302 algo_name = util.n(mo.group(1)) |
| 1220 if (len(mo.group(3)) == | 1303 if mo.group(3): |
| 1221 2 * self._get_digest_size(algo_name)): | 1304 if (len(mo.group(4)) == |
| 1222 # hex | 1305 2 * self._get_digest_size(algo_name)): |
| 1223 digest = binascii.unhexlify(mo.group(3)) | 1306 # hex |
| 1224 else: | 1307 digest = binascii.unhexlify(mo.group(4)) |
| 1225 # base64 | 1308 else: |
| 1226 digest = base64.b64decode(mo.group(3)) | 1309 # base64 |
| 1227 if mo.group(4): | 1310 digest = base64.b64decode(mo.group(4)) |
| 1228 size = int(util.n(mo.group(5)), 10) | 1311 if mo.group(5): |
| 1229 else: | 1312 size = int(util.n(mo.group(6)), 10) |
| 1230 size = None | 1313 else: |
| 1231 return (algo_name, mo.group(2), digest, size) | 1314 size = None |
| 1315 return (algo_name, mo.group(2), digest, size) | |
| 1316 else: | |
| 1317 return (algo_name, mo.group(2), None, None) | |
| 1232 else: | 1318 else: |
| 1233 assert False, line | 1319 assert False, line |
| 1234 return line | 1320 return line |
| 1235 | 1321 |
| 1236 def _get_next_line(self): | 1322 def _get_next_line(self): |
