comparison cutils/treesum.py @ 270:42f4ca423ab3

treesum: REFACTOR: Major refactoring of computing digests: - Digests that are created by reading symlink targets are created as if the result of readllink() is the content of a regulare file: no special tagging is done now. - All digest values have their canonical algorithm name prepended. - Normalize the order of computation
author Franz Glasner <fzglas.hg@dom66.de>
date Tue, 18 Feb 2025 16:07:28 +0100
parents fc002983253c
children 6fe88de236cb
comparison
equal deleted inserted replaced
269:fc002983253c 270:42f4ca423ab3
549 self._writer.flush() 549 self._writer.flush()
550 550
551 if not self._follow_symlinks.command_line and os.path.islink(root): 551 if not self._follow_symlinks.command_line and os.path.islink(root):
552 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) 552 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
553 linkdgst = self._algorithm[0]() 553 linkdgst = self._algorithm[0]()
554 linkdgst.update( 554 linkdgst.update(linktgt.fspath)
555 util.interpolate_bytes(
556 b"%d:%s,", len(linktgt.fspath), linktgt.fspath))
557 dir_dgst = self._algorithm[0]() 555 dir_dgst = self._algorithm[0]()
558 dir_dgst.update(b"1:L,") 556 dir_dgst.update(b"2:L@,")
559 dir_dgst.update( 557 dir_dgst.update(
560 util.interpolate_bytes( 558 util.interpolate_bytes(
561 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) 559 b"%d:%s,%d:%s,",
560 len(self._algorithm[1]), util.b(self._algorithm[1]),
561 len(linkdgst.digest()), linkdgst.digest()))
562 if self._size_only: 562 if self._size_only:
563 self._writer.write_size(b"./@/", 0) 563 self._writer.write_size(b"./@/", 0)
564 else: 564 else:
565 self._writer.write_file_digest( 565 self._writer.write_file_digest(
566 self._algorithm[1], 566 self._algorithm[1],
625 # linktgt = util.fsencode(os.readlink(fso.path))) 625 # linktgt = util.fsencode(os.readlink(fso.path)))
626 linkdgst = self._algorithm[0]() 626 linkdgst = self._algorithm[0]()
627 if self._utf8_mode: 627 if self._utf8_mode:
628 if linktgt.u8path is None: 628 if linktgt.u8path is None:
629 dir_tainted = True 629 dir_tainted = True
630 linkdgst.update(util.interpolate_bytes( 630 linkdgst.update(linktgt.alt_u8path)
631 b"%d:%s,", 631 else:
632 len(linktgt.alt_u8path), 632 linkdgst.update(linktgt.u8path)
633 linktgt.alt_u8path))
634 else:
635 linkdgst.update(util.interpolate_bytes(
636 b"%d:%s,",
637 len(linktgt.u8path),
638 linktgt.u8path))
639 if fso.u8name is None: 633 if fso.u8name is None:
640 dir_tainted = True 634 dir_tainted = True
641 dir_dgst.update(util.interpolate_bytes( 635 dir_dgst.update(util.interpolate_bytes(
642 b"1:S,%d:%s,", 636 b"2:@/,%d:%s,",
643 len(fso.alt_u8name), 637 len(fso.alt_u8name),
644 fso.alt_u8name)) 638 fso.alt_u8name))
645 else: 639 else:
646 dir_dgst.update(util.interpolate_bytes( 640 dir_dgst.update(util.interpolate_bytes(
647 b"1:S,%d:%s,", len(fso.u8name), fso.u8name)) 641 b"2:@/,%d:%s,", len(fso.u8name), fso.u8name))
648 else: 642 else:
649 if linktgt.fspath is None: 643 if linktgt.fspath is None:
650 dir_tainted = True 644 dir_tainted = True
651 linkdgst.update(util.interpolate_bytes( 645 linkdgst.update(linktgt.alt_fspath)
652 b"%d:%s,", 646 else:
653 len(linktgt.alt_fspath), 647 linkdgst.update(linktgt.fspath)
654 linktgt.alt_fspath))
655 else:
656 linkdgst.update(util.interpolate_bytes(
657 b"%d:%s,",
658 len(linktgt.fspath),
659 linktgt.fspath))
660 if fso.fsname is None: 648 if fso.fsname is None:
661 dir_tainted = True 649 dir_tainted = True
662 dir_dgst.update(util.interpolate_bytes( 650 dir_dgst.update(util.interpolate_bytes(
663 b"1:S,%d:%s,", 651 b"2:@/,%d:%s,",
664 len(fso.alt_fsname), 652 len(fso.alt_fsname),
665 fso.alt_fsname)) 653 fso.alt_fsname))
666 else: 654 else:
667 dir_dgst.update(util.interpolate_bytes( 655 dir_dgst.update(util.interpolate_bytes(
668 b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) 656 b"2:@/,%d:%s,", len(fso.fsname), fso.fsname))
669 # 657 #
670 # - no mtime and no mode for symlinks 658 # - no mtime and no mode for symlinks
671 # - also does not count for dir_size 659 # - also does not count for dir_size
672 # 660 #
673 dir_dgst.update(util.interpolate_bytes( 661 dir_dgst.update(util.interpolate_bytes(
674 b"%d:%s,", 662 b"%d:%s,%d:%s,",
663 len(self._algorithm[1]), util.b(self._algorithm[1]),
675 len(linkdgst.digest()), linkdgst.digest())) 664 len(linkdgst.digest()), linkdgst.digest()))
676 opath = join_output_path(top, fso.name) 665 opath = join_output_path(top, fso.name)
677 if self._utf8_mode: 666 if self._utf8_mode:
678 opath = walk.WalkDirEntry.alt_u8(opath) 667 opath = walk.WalkDirEntry.alt_u8(opath)
679 else: 668 else:
710 dir_size += sub_dir_size 699 dir_size += sub_dir_size
711 if self._utf8_mode: 700 if self._utf8_mode:
712 if fso.u8name is None: 701 if fso.u8name is None:
713 dir_tainted = True 702 dir_tainted = True
714 dir_dgst.update(util.interpolate_bytes( 703 dir_dgst.update(util.interpolate_bytes(
715 b"1:d,%d:%s,", 704 b"1:/,%d:%s,",
716 len(fso.alt_u8name), 705 len(fso.alt_u8name),
717 fso.alt_u8name)) 706 fso.alt_u8name))
718 else: 707 else:
719 dir_dgst.update(util.interpolate_bytes( 708 dir_dgst.update(util.interpolate_bytes(
720 b"1:d,%d:%s,", len(fso.u8name), fso.u8name)) 709 b"1:/,%d:%s,", len(fso.u8name), fso.u8name))
721 else: 710 else:
722 if fso.fsname is None: 711 if fso.fsname is None:
723 dir_tainted = True 712 dir_tainted = True
724 dir_dgst.update(util.interpolate_bytes( 713 dir_dgst.update(util.interpolate_bytes(
725 b"1:d,%d:%s,", 714 b"1:/,%d:%s,",
726 len(fso.alt_fsname), 715 len(fso.alt_fsname),
727 fso.alt_fsname)) 716 fso.alt_fsname))
728 else: 717 else:
729 dir_dgst.update(util.interpolate_bytes( 718 dir_dgst.update(util.interpolate_bytes(
730 b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) 719 b"1:/,%d:%s,", len(fso.fsname), fso.fsname))
731 dir_dgst.update(util.interpolate_bytes(
732 b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst))
733 if self._with_metadata_full_mode: 720 if self._with_metadata_full_mode:
734 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) 721 modestr = util.b(normalized_mode_str(fso.stat.st_mode))
735 dir_dgst.update(util.interpolate_bytes( 722 dir_dgst.update(util.interpolate_bytes(
736 b"8:fullmode,%d:%s,", len(modestr), modestr)) 723 b"8:fullmode,%d:%s,", len(modestr), modestr))
737 elif self._with_metadata_mode: 724 elif self._with_metadata_mode:
738 modestr = util.b(normalized_compatible_mode_str( 725 modestr = util.b(normalized_compatible_mode_str(
739 fso.stat.st_mode)) 726 fso.stat.st_mode))
740 dir_dgst.update(util.interpolate_bytes( 727 dir_dgst.update(util.interpolate_bytes(
741 b"4:mode,%d:%s,", len(modestr), modestr)) 728 b"4:mode,%d:%s,", len(modestr), modestr))
729 dir_dgst.update(util.interpolate_bytes(
730 b"%d:%s,%d:%s,",
731 len(sub_dir_algo), util.b(sub_dir_algo),
732 len(sub_dir_dgst), sub_dir_dgst))
742 else: 733 else:
743 if fso.is_symlink and not self._follow_symlinks.file: 734 if fso.is_symlink and not self._follow_symlinks.file:
735 #
736 # Symbolic link to some filesystem object which is not
737 # determined to be a link to a directory.
738 #
744 linktgt = walk.WalkDirEntry.from_readlink( 739 linktgt = walk.WalkDirEntry.from_readlink(
745 os.readlink(fso.path)) 740 os.readlink(fso.path))
746 # linktgt = util.fsencode(os.readlink(fso.path))) 741 # linktgt = util.fsencode(os.readlink(fso.path)))
747 linkdgst = self._algorithm[0]() 742 linkdgst = self._algorithm[0]()
748 if self._utf8_mode: 743 if self._utf8_mode:
749 if linktgt.u8path is None: 744 if linktgt.u8path is None:
750 dir_tainted = True 745 dir_tainted = True
751 linkdgst.update(util.interpolate_bytes( 746 linkdgst.update(linktgt.alt_u8path)
752 b"%d:%s,", 747 else:
753 len(linktgt.alt_u8path), 748 linkdgst.update(linktgt.u8path)
754 linktgt.alt_u8path))
755 else:
756 linkdgst.update(util.interpolate_bytes(
757 b"%d:%s,",
758 len(linktgt.u8path),
759 linktgt.u8path))
760 if fso.u8name is None: 749 if fso.u8name is None:
761 dir_tainted = True 750 dir_tainted = True
762 dir_dgst.update(util.interpolate_bytes( 751 dir_dgst.update(util.interpolate_bytes(
763 b"1:F,%d:%s,", 752 b"1:@,%d:%s,",
764 len(fso.alt_u8name), 753 len(fso.alt_u8name),
765 fso.alt_u8name)) 754 fso.alt_u8name))
766 else: 755 else:
767 dir_dgst.update(util.interpolate_bytes( 756 dir_dgst.update(util.interpolate_bytes(
768 b"1:F,%d:%s,", len(fso.u8name), fso.u8name)) 757 b"1:@,%d:%s,", len(fso.u8name), fso.u8name))
769 else: 758 else:
770 if linktgt.fspath is None: 759 if linktgt.fspath is None:
771 dir_tainted = True 760 dir_tainted = True
772 linkdgst.update(util.interpolate_bytes( 761 linkdgst.update(linktgt.alt_fspath)
773 b"%d:%s,", 762 else:
774 len(linktgt.alt_fspath), 763 linkdgst.update(linktgt.fspath)
775 linktgt.alt_fspath))
776 else:
777 linkdgst.update(util.interpolate_bytes(
778 b"%d:%s,",
779 len(linktgt.fspath),
780 linktgt.fspath))
781 if fso.fsname is None: 764 if fso.fsname is None:
782 dir_tainted = True 765 dir_tainted = True
783 dir_dgst.update(util.interpolate_bytes( 766 dir_dgst.update(util.interpolate_bytes(
784 b"1:F,%d:%s,", 767 b"1:@,%d:%s,",
785 len(fso.alt_fsname), 768 len(fso.alt_fsname),
786 fso.alt_fsname)) 769 fso.alt_fsname))
787 else: 770 else:
788 dir_dgst.update(util.interpolate_bytes( 771 dir_dgst.update(util.interpolate_bytes(
789 b"1:F,%d:%s,", len(fso.fsname), fso.fsname)) 772 b"1:@,%d:%s,", len(fso.fsname), fso.fsname))
790 # 773 #
791 # - no mtime and no mode for symlinks 774 # - no mtime and no mode for symlinks
792 # - also does not count for dir_size 775 # - also does not count for dir_size
793 # 776 #
794 dir_dgst.update(util.interpolate_bytes( 777 dir_dgst.update(util.interpolate_bytes(
795 b"%d:%s,", 778 b"%d:%s,%d:%s,",
779 len(self._algorithm[1]), util.b(self._algorithm[1]),
796 len(linkdgst.digest()), linkdgst.digest())) 780 len(linkdgst.digest()), linkdgst.digest()))
797 opath = join_output_path(top, fso.name) 781 opath = join_output_path(top, fso.name)
798 if self._utf8_mode: 782 if self._utf8_mode:
799 opath = walk.WalkDirEntry.alt_u8(opath) 783 opath = walk.WalkDirEntry.alt_u8(opath)
800 else: 784 else:
816 800
817 if self._utf8_mode: 801 if self._utf8_mode:
818 if fso.u8name is None: 802 if fso.u8name is None:
819 dir_tainted = True 803 dir_tainted = True
820 dir_dgst.update(util.interpolate_bytes( 804 dir_dgst.update(util.interpolate_bytes(
821 b"1:f,%d:%s,", 805 b"0:,%d:%s,",
822 len(fso.alt_u8name), 806 len(fso.alt_u8name),
823 fso.alt_u8name)) 807 fso.alt_u8name))
824 else: 808 else:
825 dir_dgst.update(util.interpolate_bytes( 809 dir_dgst.update(util.interpolate_bytes(
826 b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) 810 b"0:,%d:%s,", len(fso.u8name), fso.u8name))
827 else: 811 else:
828 if fso.fsname is None: 812 if fso.fsname is None:
829 dir_tainted = True 813 dir_tainted = True
830 dir_dgst.update(util.interpolate_bytes( 814 dir_dgst.update(util.interpolate_bytes(
831 b"1:f,%d:%s,", 815 b"0:,%d:%s,",
832 len(fso.alt_fsname), 816 len(fso.alt_fsname),
833 fso.alt_fsname)) 817 fso.alt_fsname))
834 else: 818 else:
835 dir_dgst.update(util.interpolate_bytes( 819 dir_dgst.update(util.interpolate_bytes(
836 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) 820 b"0:,%d:%s,", len(fso.fsname), fso.fsname))
837 opath = join_output_path(top, fso.name) 821 opath = join_output_path(top, fso.name)
838 if self._utf8_mode: 822 if self._utf8_mode:
839 opath = walk.WalkDirEntry.alt_u8(opath) 823 opath = walk.WalkDirEntry.alt_u8(opath)
840 else: 824 else:
841 opath = walk.WalkDirEntry.alt_fs(opath) 825 opath = walk.WalkDirEntry.alt_fs(opath)
888 dgst = digest.compute_digest_file( 872 dgst = digest.compute_digest_file(
889 self._algorithm[0], 873 self._algorithm[0],
890 fso.path, 874 fso.path,
891 use_mmap=self._use_mmap) 875 use_mmap=self._use_mmap)
892 dir_dgst.update(util.interpolate_bytes( 876 dir_dgst.update(util.interpolate_bytes(
893 b"%d:%s,", len(dgst), dgst)) 877 b"%d:%s,%d:%s,",
878 len(self._algorithm[1]),
879 util.b(self._algorithm[1]),
880 len(dgst),
881 dgst))
894 if self._size_only: 882 if self._size_only:
895 self._writer.write_size(opath, fso.stat.st_size) 883 self._writer.write_size(opath, fso.stat.st_size)
896 else: 884 else:
897 sz = fso.stat.st_size if self._print_size else None 885 sz = fso.stat.st_size if self._print_size else None
898 self._writer.write_file_digest( 886 self._writer.write_file_digest(