comparison cutils/treesum.py @ 275:c72f5b2dbc6f

treesum: Simplify the path computations and make the visible output more consistent. Current format of digest lines: - No `=' if there was a severe error so that digest and/or size could not be computed. - Empty fields for digest and/or size of they were not computed on purpose: * for symbolic links * for special files
author Franz Glasner <fzglas.hg@dom66.de>
date Thu, 20 Feb 2025 15:30:51 +0100
parents 6fe88de236cb
children 9676ecd32a07
comparison
equal deleted inserted replaced
274:224725fd9f2f 275:c72f5b2dbc6f
564 util.interpolate_bytes( 564 util.interpolate_bytes(
565 b"%d:%s,%d:%s,", 565 b"%d:%s,%d:%s,",
566 len(self._algorithm[1]), util.b(self._algorithm[1]), 566 len(self._algorithm[1]), util.b(self._algorithm[1]),
567 len(linkdgst.digest()), linkdgst.digest())) 567 len(linkdgst.digest()), linkdgst.digest()))
568 if self._size_only: 568 if self._size_only:
569 self._writer.write_size(b"./@/", 0) 569 self._writer.write_size(b"./@/", b"")
570 else: 570 else:
571 sz = b"" if self._print_size else None
571 self._writer.write_file_digest( 572 self._writer.write_file_digest(
572 self._algorithm[1], 573 self._algorithm[1],
573 b"./@/", 574 b"./@/",
574 dir_dgst.digest(), 575 dir_dgst.digest(),
575 self._use_base64) 576 self._use_base64,
577 size=sz)
576 self._writer.flush() 578 self._writer.flush()
577 else: 579 else:
578 self._generate(os.path.normpath(root), tuple()) 580 self._generate(os.path.normpath(root), tuple())
579 self._writer.finish() 581 self._writer.finish()
580 582
583 path = os.path.join(root, *top) if top else root 585 path = os.path.join(root, *top) if top else root
584 try: 586 try:
585 with walk.ScanDir(path) as dirscan: 587 with walk.ScanDir(path) as dirscan:
586 fsobjects = list(dirscan) 588 fsobjects = list(dirscan)
587 except OSError as e: 589 except OSError as e:
588 if self._utf8_mode: 590 #
589 opath = walk.WalkDirEntry.alt_u8(path) 591 # NOTE: Sync the error handler code with this method's
590 else: 592 # code below before returning!
591 opath = walk.WalkDirEntry.alt_fs(path) 593 #
592 if e.errno == errno.ENOTDIR: 594 if e.errno == errno.ENOTDIR:
593 # object exists but is not a directory 595 # object exists but is not a directory
594 errmsg = b"not a directory" 596 errmsg = b"not a directory"
595 elif e.errno in (errno.EACCES, errno.EPERM, 597 elif e.errno in (errno.EACCES, errno.EPERM,
596 getattr(errno, "ENOTCAPABLE", errno.EACCES)): 598 getattr(errno, "ENOTCAPABLE", errno.EACCES)):
600 elif e.errno == errno.ENOENT: 602 elif e.errno == errno.ENOENT:
601 # given object does not exist 603 # given object does not exist
602 errmsg = b"no such file or directory" 604 errmsg = b"no such file or directory"
603 else: 605 else:
604 raise 606 raise
607 if self._utf8_mode:
608 opath = walk.WalkDirEntry.alt_u8(path)
609 else:
610 opath = walk.WalkDirEntry.alt_fs(path)
605 self._writer.write_error(util.interpolate_bytes( 611 self._writer.write_error(util.interpolate_bytes(
606 b"`%s': %s", opath, errmsg)) 612 b"`%s': %s", opath, errmsg))
607 opath = join_output_path(top, None) 613 opath = join_output_path(top, None)
608 if opath: 614 if opath:
609 if self._utf8_mode: 615 if self._utf8_mode:
622 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs) 628 fsobjects.sort(key=walk.WalkDirEntry.sort_key_fs)
623 dir_dgst = self._algorithm[0]() 629 dir_dgst = self._algorithm[0]()
624 dir_size = 0 630 dir_size = 0
625 dir_tainted = False 631 dir_tainted = False
626 for fso in fsobjects: 632 for fso in fsobjects:
627 if fso.is_dir: 633 # Determine the effective name to be used for digesting
628 if fso.is_symlink and not self._follow_symlinks.directory: 634 if self._utf8_mode:
635 if fso.u8name is None:
636 dir_tainted = True
637 effective_fso_name = fso.alt_u8name
638 else:
639 effective_fso_name = fso.u8name
640 else:
641 if fso.fsname is None:
642 dir_tainted = True
643 effective_fso_name = fso.alt_fsname
644 else:
645 effective_fso_name = fso.fsname
646 # Determine the path (mostly its prefix) that is to be printed
647 opath = join_output_path(top, fso.name)
648 if self._utf8_mode:
649 opath = walk.WalkDirEntry.alt_u8(opath)
650 else:
651 opath = walk.WalkDirEntry.alt_fs(opath)
652 if fso.is_special:
653 # Determine the tag character
654 if fso.is_chr:
655 special_tag = b':'
656 elif fso.is_blk:
657 special_tag = b';'
658 elif fso.is_fifo:
659 special_tag = b'|'
660 elif fso.is_socket:
661 special_tag = b'='
662 elif fso.is_door:
663 special_tag = b'>'
664 elif fso.is_whiteout:
665 special_tag = b'%'
666 elif fso.is_eventport:
667 special_tag = b'+'
668 else:
669 assert False, "unknown special filesystem object"
670 assert fso.stat is not None # because .is_special is True
671 if fso.is_symlink and not self._follow_symlinks.file:
629 linktgt = walk.WalkDirEntry.from_readlink( 672 linktgt = walk.WalkDirEntry.from_readlink(
630 os.readlink(fso.path)) 673 os.readlink(fso.path))
631 # linktgt = util.fsencode(os.readlink(fso.path)))
632 linkdgst = self._algorithm[0]() 674 linkdgst = self._algorithm[0]()
633 if self._utf8_mode: 675 if self._utf8_mode:
634 if linktgt.u8path is None: 676 if linktgt.u8path is None:
635 dir_tainted = True 677 dir_tainted = True
636 linkdgst.update(linktgt.alt_u8path) 678 linkdgst.update(linktgt.alt_u8path)
637 else: 679 else:
638 linkdgst.update(linktgt.u8path) 680 linkdgst.update(linktgt.u8path)
639 if fso.u8name is None:
640 dir_tainted = True
641 dir_dgst.update(util.interpolate_bytes(
642 b"2:@/,%d:%s,",
643 len(fso.alt_u8name),
644 fso.alt_u8name))
645 else:
646 dir_dgst.update(util.interpolate_bytes(
647 b"2:@/,%d:%s,", len(fso.u8name), fso.u8name))
648 else: 681 else:
649 if linktgt.fspath is None: 682 if linktgt.fspath is None:
650 dir_tainted = True 683 dir_tainted = True
651 linkdgst.update(linktgt.alt_fspath) 684 linkdgst.update(linktgt.alt_fspath)
652 else: 685 else:
653 linkdgst.update(linktgt.fspath) 686 linkdgst.update(linktgt.fspath)
654 if fso.fsname is None: 687 dir_dgst.update(util.interpolate_bytes(
688 b"2:@%s,%d:%s,",
689 special_tag,
690 len(effective_fso_name),
691 effective_fso_name))
692 dir_dgst.update(util.interpolate_bytes(
693 b"%d:%s,%d:%s,",
694 len(self._algorithm[1]), util.b(self._algorithm[1]),
695 len(linkdgst.digest()), linkdgst.digest()))
696 #
697 # - no mtime and no mode for symlinks
698 # - also does not count for dir_size
699 #
700 if self._size_only:
701 self._writer.write_size(
702 util.interpolate_bytes(
703 b"%s/./@%s", opath, special_tag),
704 b"")
705 else:
706 sz = b"" if self._print_size else None
707 self._writer.write_file_digest(
708 self._algorithm[1],
709 util.interpolate_bytes(
710 b"%s/./@%s", opath, special_tag),
711 linkdgst.digest(),
712 self._use_base64,
713 size=sz)
714 else:
715 #
716 # Follow the symlink to special file and/or handle a
717 # special file
718 #
719 dir_dgst.update(util.interpolate_bytes(
720 b"1:%s,%d:%s,",
721 special_tag,
722 len(effective_fso_name),
723 effective_fso_name))
724 # no important size here but a mode
725 if self._with_metadata_mtime:
726 mtime = datetime.datetime.utcfromtimestamp(
727 int(fso.stat.st_mtime))
728 mtime = util.b(mtime.isoformat("T") + "Z")
729 dir_dgst.update(util.interpolate_bytes(
730 b"5:mtime,%d:%s,", len(mtime), mtime))
731 if self._with_metadata_full_mode:
732 modestr = util.b(
733 normalized_mode_str(fso.stat.st_mode))
734 dir_dgst.update(util.interpolate_bytes(
735 b"8:fullmode,%d:%s,", len(modestr), modestr))
736 elif self._with_metadata_mode:
737 modestr = util.b(normalized_compatible_mode_str(
738 fso.stat.st_mode))
739 dir_dgst.update(util.interpolate_bytes(
740 b"4:mode,%d:%s,", len(modestr), modestr))
741 if self._size_only:
742 self._writer.write_size(
743 util.interpolate_bytes(
744 b"%s/./%s", opath, special_tag),
745 b"")
746 else:
747 sz = b"" if self._print_size else None
748 self._writer.write_file_digest(
749 self._algorithm[1],
750 util.interpolate_bytes(
751 b"%s/./%s", opath, special_tag),
752 b"",
753 self._use_base64,
754 size=sz)
755 elif fso.is_dir:
756 assert fso.stat is not None # because .is_dir is True
757 if fso.is_symlink and not self._follow_symlinks.directory:
758 linktgt = walk.WalkDirEntry.from_readlink(
759 os.readlink(fso.path))
760 linkdgst = self._algorithm[0]()
761 if self._utf8_mode:
762 if linktgt.u8path is None:
655 dir_tainted = True 763 dir_tainted = True
656 dir_dgst.update(util.interpolate_bytes( 764 linkdgst.update(linktgt.alt_u8path)
657 b"2:@/,%d:%s,",
658 len(fso.alt_fsname),
659 fso.alt_fsname))
660 else: 765 else:
661 dir_dgst.update(util.interpolate_bytes( 766 linkdgst.update(linktgt.u8path)
662 b"2:@/,%d:%s,", len(fso.fsname), fso.fsname)) 767 else:
768 if linktgt.fspath is None:
769 dir_tainted = True
770 linkdgst.update(linktgt.alt_fspath)
771 else:
772 linkdgst.update(linktgt.fspath)
773 dir_dgst.update(util.interpolate_bytes(
774 b"2:@/,%d:%s,",
775 len(effective_fso_name),
776 effective_fso_name))
663 # 777 #
664 # - no mtime and no mode for symlinks 778 # - no mtime and no mode for symlinks
665 # - also does not count for dir_size 779 # - also does not count for dir_size
666 # 780 #
667 dir_dgst.update(util.interpolate_bytes( 781 dir_dgst.update(util.interpolate_bytes(
668 b"%d:%s,%d:%s,", 782 b"%d:%s,%d:%s,",
669 len(self._algorithm[1]), util.b(self._algorithm[1]), 783 len(self._algorithm[1]), util.b(self._algorithm[1]),
670 len(linkdgst.digest()), linkdgst.digest())) 784 len(linkdgst.digest()), linkdgst.digest()))
671 opath = join_output_path(top, fso.name)
672 if self._utf8_mode:
673 opath = walk.WalkDirEntry.alt_u8(opath)
674 else:
675 opath = walk.WalkDirEntry.alt_fs(opath)
676 if self._size_only: 785 if self._size_only:
677 self._writer.write_size( 786 self._writer.write_size(
678 util.interpolate_bytes(b"%s/./@/", opath), 787 util.interpolate_bytes(b"%s/./@/", opath),
679 0) 788 b"")
680 else: 789 else:
790 sz = b"" if self._print_size else None
681 self._writer.write_file_digest( 791 self._writer.write_file_digest(
682 self._algorithm[1], 792 self._algorithm[1],
683 util.interpolate_bytes(b"%s/./@/", opath), 793 util.interpolate_bytes(b"%s/./@/", opath),
684 linkdgst.digest(), 794 linkdgst.digest(),
685 self._use_base64) 795 self._use_base64)
701 # handled already 811 # handled already
702 # 812 #
703 assert False 813 assert False
704 814
705 dir_size += sub_dir_size 815 dir_size += sub_dir_size
706 if self._utf8_mode: 816 dir_dgst.update(util.interpolate_bytes(
707 if fso.u8name is None: 817 b"1:/,%d:%s,",
708 dir_tainted = True 818 len(effective_fso_name),
709 dir_dgst.update(util.interpolate_bytes( 819 effective_fso_name))
710 b"1:/,%d:%s,",
711 len(fso.alt_u8name),
712 fso.alt_u8name))
713 else:
714 dir_dgst.update(util.interpolate_bytes(
715 b"1:/,%d:%s,", len(fso.u8name), fso.u8name))
716 else:
717 if fso.fsname is None:
718 dir_tainted = True
719 dir_dgst.update(util.interpolate_bytes(
720 b"1:/,%d:%s,",
721 len(fso.alt_fsname),
722 fso.alt_fsname))
723 else:
724 dir_dgst.update(util.interpolate_bytes(
725 b"1:/,%d:%s,", len(fso.fsname), fso.fsname))
726 if self._with_metadata_full_mode: 820 if self._with_metadata_full_mode:
727 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) 821 modestr = util.b(normalized_mode_str(fso.stat.st_mode))
728 dir_dgst.update(util.interpolate_bytes( 822 dir_dgst.update(util.interpolate_bytes(
729 b"8:fullmode,%d:%s,", len(modestr), modestr)) 823 b"8:fullmode,%d:%s,", len(modestr), modestr))
730 elif self._with_metadata_mode: 824 elif self._with_metadata_mode:
738 len(sub_dir_dgst), sub_dir_dgst)) 832 len(sub_dir_dgst), sub_dir_dgst))
739 else: 833 else:
740 if fso.is_symlink and not self._follow_symlinks.file: 834 if fso.is_symlink and not self._follow_symlinks.file:
741 # 835 #
742 # Symbolic link to some filesystem object which is not 836 # Symbolic link to some filesystem object which is not
743 # determined to be a link to a directory. 837 # determined to be a link to a directory or some other
838 # special file (socket, FIFO, et al.).
744 # 839 #
745 linktgt = walk.WalkDirEntry.from_readlink( 840 linktgt = walk.WalkDirEntry.from_readlink(
746 os.readlink(fso.path)) 841 os.readlink(fso.path))
747 # linktgt = util.fsencode(os.readlink(fso.path)))
748 linkdgst = self._algorithm[0]() 842 linkdgst = self._algorithm[0]()
749 if self._utf8_mode: 843 if self._utf8_mode:
750 if linktgt.u8path is None: 844 if linktgt.u8path is None:
751 dir_tainted = True 845 dir_tainted = True
752 linkdgst.update(linktgt.alt_u8path) 846 linkdgst.update(linktgt.alt_u8path)
753 else: 847 else:
754 linkdgst.update(linktgt.u8path) 848 linkdgst.update(linktgt.u8path)
755 if fso.u8name is None:
756 dir_tainted = True
757 dir_dgst.update(util.interpolate_bytes(
758 b"1:@,%d:%s,",
759 len(fso.alt_u8name),
760 fso.alt_u8name))
761 else:
762 dir_dgst.update(util.interpolate_bytes(
763 b"1:@,%d:%s,", len(fso.u8name), fso.u8name))
764 else: 849 else:
765 if linktgt.fspath is None: 850 if linktgt.fspath is None:
766 dir_tainted = True 851 dir_tainted = True
767 linkdgst.update(linktgt.alt_fspath) 852 linkdgst.update(linktgt.alt_fspath)
768 else: 853 else:
769 linkdgst.update(linktgt.fspath) 854 linkdgst.update(linktgt.fspath)
770 if fso.fsname is None: 855 dir_dgst.update(util.interpolate_bytes(
771 dir_tainted = True 856 b"1:@,%d:%s,",
772 dir_dgst.update(util.interpolate_bytes( 857 len(effective_fso_name),
773 b"1:@,%d:%s,", 858 effective_fso_name))
774 len(fso.alt_fsname),
775 fso.alt_fsname))
776 else:
777 dir_dgst.update(util.interpolate_bytes(
778 b"1:@,%d:%s,", len(fso.fsname), fso.fsname))
779 # 859 #
780 # - no mtime and no mode for symlinks 860 # - no mtime and no mode for symlinks
781 # - also does not count for dir_size 861 # - also does not count for dir_size
782 # 862 #
783 dir_dgst.update(util.interpolate_bytes( 863 dir_dgst.update(util.interpolate_bytes(
784 b"%d:%s,%d:%s,", 864 b"%d:%s,%d:%s,",
785 len(self._algorithm[1]), util.b(self._algorithm[1]), 865 len(self._algorithm[1]), util.b(self._algorithm[1]),
786 len(linkdgst.digest()), linkdgst.digest())) 866 len(linkdgst.digest()), linkdgst.digest()))
787 opath = join_output_path(top, fso.name)
788 if self._utf8_mode:
789 opath = walk.WalkDirEntry.alt_u8(opath)
790 else:
791 opath = walk.WalkDirEntry.alt_fs(opath)
792 if self._size_only: 867 if self._size_only:
793 self._writer.write_size( 868 self._writer.write_size(
794 util.interpolate_bytes(b"%s/./@", opath), 869 util.interpolate_bytes(b"%s/./@", opath),
795 0) 870 b"")
796 else: 871 else:
872 sz = b"" if self._print_size else None
797 self._writer.write_file_digest( 873 self._writer.write_file_digest(
798 self._algorithm[1], 874 self._algorithm[1],
799 util.interpolate_bytes(b"%s/./@", opath), 875 util.interpolate_bytes(b"%s/./@", opath),
800 linkdgst.digest(), 876 linkdgst.digest(),
801 self._use_base64) 877 self._use_base64,
878 size=sz)
802 else: 879 else:
803 # 880 #
804 # Follow the symlink to file or handle a "real" file 881 # Follow the symlink to file or handle a "real" file
805 # 882 #
806 883
807 if self._utf8_mode: 884 dir_dgst.update(util.interpolate_bytes(
808 if fso.u8name is None: 885 b"0:,%d:%s,",
809 dir_tainted = True 886 len(effective_fso_name),
810 dir_dgst.update(util.interpolate_bytes( 887 effective_fso_name))
811 b"0:,%d:%s,",
812 len(fso.alt_u8name),
813 fso.alt_u8name))
814 else:
815 dir_dgst.update(util.interpolate_bytes(
816 b"0:,%d:%s,", len(fso.u8name), fso.u8name))
817 else:
818 if fso.fsname is None:
819 dir_tainted = True
820 dir_dgst.update(util.interpolate_bytes(
821 b"0:,%d:%s,",
822 len(fso.alt_fsname),
823 fso.alt_fsname))
824 else:
825 dir_dgst.update(util.interpolate_bytes(
826 b"0:,%d:%s,", len(fso.fsname), fso.fsname))
827 opath = join_output_path(top, fso.name)
828 if self._utf8_mode:
829 opath = walk.WalkDirEntry.alt_u8(opath)
830 else:
831 opath = walk.WalkDirEntry.alt_fs(opath)
832 if fso.stat is None: 888 if fso.stat is None:
833 # 889 #
834 # Error: most likely a broken symlink here 890 # Error: most likely a broken symlink here
835 # 891 #
836 dir_tainted = True 892 dir_tainted = True
1146 1202
1147 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines 1203 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines
1148 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long 1204 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long
1149 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long 1205 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long
1150 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long 1206 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long
1151 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?)?[ \t]*\r?\n\Z") # noqa: E501 line too long 1207 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z") # noqa: E501 line too long
1152 1208
1153 def __init__(self, _fp, _filename, _own_fp): 1209 def __init__(self, _fp, _filename, _own_fp):
1154 self._fp = _fp 1210 self._fp = _fp
1155 self._own_fp = _own_fp 1211 self._own_fp = _own_fp
1156 self._filename = _filename 1212 self._filename = _filename
1291 mo = self.PATTERN4.search(line) 1347 mo = self.PATTERN4.search(line)
1292 if mo: 1348 if mo:
1293 self._update_crc(line) 1349 self._update_crc(line)
1294 algo_name = util.n(mo.group(1)) 1350 algo_name = util.n(mo.group(1))
1295 if mo.group(3): 1351 if mo.group(3):
1296 if (len(mo.group(4)) == 1352 if mo.group(4):
1297 2 * self._get_digest_size(algo_name)): 1353 if (len(mo.group(4)) ==
1298 # hex 1354 2 * self._get_digest_size(algo_name)):
1299 digest = binascii.unhexlify(mo.group(4)) 1355 # hex
1356 digest = binascii.unhexlify(mo.group(4))
1357 else:
1358 # base64
1359 digest = base64.b64decode(mo.group(4))
1300 else: 1360 else:
1301 # base64 1361 digest = None
1302 digest = base64.b64decode(mo.group(4))
1303 if mo.group(5): 1362 if mo.group(5):
1304 size = int(util.n(mo.group(6)), 10) 1363 if mo.group(6):
1364 size = int(util.n(mo.group(6)), 10)
1365 else:
1366 size = None
1305 else: 1367 else:
1306 size = None 1368 size = None
1307 return (algo_name, mo.group(2), digest, size) 1369 return (algo_name, mo.group(2), digest, size)
1308 else: 1370 else:
1309 return (algo_name, mo.group(2), None, None) 1371 return (algo_name, mo.group(2), None, None)