Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 270:42f4ca423ab3
treesum: REFACTOR: Major refactoring of computing digests:
- Digests that are created by reading symlink targets are created as if the
result of readllink() is the content of a regulare file: no special
tagging is done now.
- All digest values have their canonical algorithm name prepended.
- Normalize the order of computation
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 18 Feb 2025 16:07:28 +0100 |
| parents | fc002983253c |
| children | 6fe88de236cb |
comparison
equal
deleted
inserted
replaced
| 269:fc002983253c | 270:42f4ca423ab3 |
|---|---|
| 549 self._writer.flush() | 549 self._writer.flush() |
| 550 | 550 |
| 551 if not self._follow_symlinks.command_line and os.path.islink(root): | 551 if not self._follow_symlinks.command_line and os.path.islink(root): |
| 552 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) | 552 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) |
| 553 linkdgst = self._algorithm[0]() | 553 linkdgst = self._algorithm[0]() |
| 554 linkdgst.update( | 554 linkdgst.update(linktgt.fspath) |
| 555 util.interpolate_bytes( | |
| 556 b"%d:%s,", len(linktgt.fspath), linktgt.fspath)) | |
| 557 dir_dgst = self._algorithm[0]() | 555 dir_dgst = self._algorithm[0]() |
| 558 dir_dgst.update(b"1:L,") | 556 dir_dgst.update(b"2:L@,") |
| 559 dir_dgst.update( | 557 dir_dgst.update( |
| 560 util.interpolate_bytes( | 558 util.interpolate_bytes( |
| 561 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) | 559 b"%d:%s,%d:%s,", |
| 560 len(self._algorithm[1]), util.b(self._algorithm[1]), | |
| 561 len(linkdgst.digest()), linkdgst.digest())) | |
| 562 if self._size_only: | 562 if self._size_only: |
| 563 self._writer.write_size(b"./@/", 0) | 563 self._writer.write_size(b"./@/", 0) |
| 564 else: | 564 else: |
| 565 self._writer.write_file_digest( | 565 self._writer.write_file_digest( |
| 566 self._algorithm[1], | 566 self._algorithm[1], |
| 625 # linktgt = util.fsencode(os.readlink(fso.path))) | 625 # linktgt = util.fsencode(os.readlink(fso.path))) |
| 626 linkdgst = self._algorithm[0]() | 626 linkdgst = self._algorithm[0]() |
| 627 if self._utf8_mode: | 627 if self._utf8_mode: |
| 628 if linktgt.u8path is None: | 628 if linktgt.u8path is None: |
| 629 dir_tainted = True | 629 dir_tainted = True |
| 630 linkdgst.update(util.interpolate_bytes( | 630 linkdgst.update(linktgt.alt_u8path) |
| 631 b"%d:%s,", | 631 else: |
| 632 len(linktgt.alt_u8path), | 632 linkdgst.update(linktgt.u8path) |
| 633 linktgt.alt_u8path)) | |
| 634 else: | |
| 635 linkdgst.update(util.interpolate_bytes( | |
| 636 b"%d:%s,", | |
| 637 len(linktgt.u8path), | |
| 638 linktgt.u8path)) | |
| 639 if fso.u8name is None: | 633 if fso.u8name is None: |
| 640 dir_tainted = True | 634 dir_tainted = True |
| 641 dir_dgst.update(util.interpolate_bytes( | 635 dir_dgst.update(util.interpolate_bytes( |
| 642 b"1:S,%d:%s,", | 636 b"2:@/,%d:%s,", |
| 643 len(fso.alt_u8name), | 637 len(fso.alt_u8name), |
| 644 fso.alt_u8name)) | 638 fso.alt_u8name)) |
| 645 else: | 639 else: |
| 646 dir_dgst.update(util.interpolate_bytes( | 640 dir_dgst.update(util.interpolate_bytes( |
| 647 b"1:S,%d:%s,", len(fso.u8name), fso.u8name)) | 641 b"2:@/,%d:%s,", len(fso.u8name), fso.u8name)) |
| 648 else: | 642 else: |
| 649 if linktgt.fspath is None: | 643 if linktgt.fspath is None: |
| 650 dir_tainted = True | 644 dir_tainted = True |
| 651 linkdgst.update(util.interpolate_bytes( | 645 linkdgst.update(linktgt.alt_fspath) |
| 652 b"%d:%s,", | 646 else: |
| 653 len(linktgt.alt_fspath), | 647 linkdgst.update(linktgt.fspath) |
| 654 linktgt.alt_fspath)) | |
| 655 else: | |
| 656 linkdgst.update(util.interpolate_bytes( | |
| 657 b"%d:%s,", | |
| 658 len(linktgt.fspath), | |
| 659 linktgt.fspath)) | |
| 660 if fso.fsname is None: | 648 if fso.fsname is None: |
| 661 dir_tainted = True | 649 dir_tainted = True |
| 662 dir_dgst.update(util.interpolate_bytes( | 650 dir_dgst.update(util.interpolate_bytes( |
| 663 b"1:S,%d:%s,", | 651 b"2:@/,%d:%s,", |
| 664 len(fso.alt_fsname), | 652 len(fso.alt_fsname), |
| 665 fso.alt_fsname)) | 653 fso.alt_fsname)) |
| 666 else: | 654 else: |
| 667 dir_dgst.update(util.interpolate_bytes( | 655 dir_dgst.update(util.interpolate_bytes( |
| 668 b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) | 656 b"2:@/,%d:%s,", len(fso.fsname), fso.fsname)) |
| 669 # | 657 # |
| 670 # - no mtime and no mode for symlinks | 658 # - no mtime and no mode for symlinks |
| 671 # - also does not count for dir_size | 659 # - also does not count for dir_size |
| 672 # | 660 # |
| 673 dir_dgst.update(util.interpolate_bytes( | 661 dir_dgst.update(util.interpolate_bytes( |
| 674 b"%d:%s,", | 662 b"%d:%s,%d:%s,", |
| 663 len(self._algorithm[1]), util.b(self._algorithm[1]), | |
| 675 len(linkdgst.digest()), linkdgst.digest())) | 664 len(linkdgst.digest()), linkdgst.digest())) |
| 676 opath = join_output_path(top, fso.name) | 665 opath = join_output_path(top, fso.name) |
| 677 if self._utf8_mode: | 666 if self._utf8_mode: |
| 678 opath = walk.WalkDirEntry.alt_u8(opath) | 667 opath = walk.WalkDirEntry.alt_u8(opath) |
| 679 else: | 668 else: |
| 710 dir_size += sub_dir_size | 699 dir_size += sub_dir_size |
| 711 if self._utf8_mode: | 700 if self._utf8_mode: |
| 712 if fso.u8name is None: | 701 if fso.u8name is None: |
| 713 dir_tainted = True | 702 dir_tainted = True |
| 714 dir_dgst.update(util.interpolate_bytes( | 703 dir_dgst.update(util.interpolate_bytes( |
| 715 b"1:d,%d:%s,", | 704 b"1:/,%d:%s,", |
| 716 len(fso.alt_u8name), | 705 len(fso.alt_u8name), |
| 717 fso.alt_u8name)) | 706 fso.alt_u8name)) |
| 718 else: | 707 else: |
| 719 dir_dgst.update(util.interpolate_bytes( | 708 dir_dgst.update(util.interpolate_bytes( |
| 720 b"1:d,%d:%s,", len(fso.u8name), fso.u8name)) | 709 b"1:/,%d:%s,", len(fso.u8name), fso.u8name)) |
| 721 else: | 710 else: |
| 722 if fso.fsname is None: | 711 if fso.fsname is None: |
| 723 dir_tainted = True | 712 dir_tainted = True |
| 724 dir_dgst.update(util.interpolate_bytes( | 713 dir_dgst.update(util.interpolate_bytes( |
| 725 b"1:d,%d:%s,", | 714 b"1:/,%d:%s,", |
| 726 len(fso.alt_fsname), | 715 len(fso.alt_fsname), |
| 727 fso.alt_fsname)) | 716 fso.alt_fsname)) |
| 728 else: | 717 else: |
| 729 dir_dgst.update(util.interpolate_bytes( | 718 dir_dgst.update(util.interpolate_bytes( |
| 730 b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) | 719 b"1:/,%d:%s,", len(fso.fsname), fso.fsname)) |
| 731 dir_dgst.update(util.interpolate_bytes( | |
| 732 b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst)) | |
| 733 if self._with_metadata_full_mode: | 720 if self._with_metadata_full_mode: |
| 734 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) | 721 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) |
| 735 dir_dgst.update(util.interpolate_bytes( | 722 dir_dgst.update(util.interpolate_bytes( |
| 736 b"8:fullmode,%d:%s,", len(modestr), modestr)) | 723 b"8:fullmode,%d:%s,", len(modestr), modestr)) |
| 737 elif self._with_metadata_mode: | 724 elif self._with_metadata_mode: |
| 738 modestr = util.b(normalized_compatible_mode_str( | 725 modestr = util.b(normalized_compatible_mode_str( |
| 739 fso.stat.st_mode)) | 726 fso.stat.st_mode)) |
| 740 dir_dgst.update(util.interpolate_bytes( | 727 dir_dgst.update(util.interpolate_bytes( |
| 741 b"4:mode,%d:%s,", len(modestr), modestr)) | 728 b"4:mode,%d:%s,", len(modestr), modestr)) |
| 729 dir_dgst.update(util.interpolate_bytes( | |
| 730 b"%d:%s,%d:%s,", | |
| 731 len(sub_dir_algo), util.b(sub_dir_algo), | |
| 732 len(sub_dir_dgst), sub_dir_dgst)) | |
| 742 else: | 733 else: |
| 743 if fso.is_symlink and not self._follow_symlinks.file: | 734 if fso.is_symlink and not self._follow_symlinks.file: |
| 735 # | |
| 736 # Symbolic link to some filesystem object which is not | |
| 737 # determined to be a link to a directory. | |
| 738 # | |
| 744 linktgt = walk.WalkDirEntry.from_readlink( | 739 linktgt = walk.WalkDirEntry.from_readlink( |
| 745 os.readlink(fso.path)) | 740 os.readlink(fso.path)) |
| 746 # linktgt = util.fsencode(os.readlink(fso.path))) | 741 # linktgt = util.fsencode(os.readlink(fso.path))) |
| 747 linkdgst = self._algorithm[0]() | 742 linkdgst = self._algorithm[0]() |
| 748 if self._utf8_mode: | 743 if self._utf8_mode: |
| 749 if linktgt.u8path is None: | 744 if linktgt.u8path is None: |
| 750 dir_tainted = True | 745 dir_tainted = True |
| 751 linkdgst.update(util.interpolate_bytes( | 746 linkdgst.update(linktgt.alt_u8path) |
| 752 b"%d:%s,", | 747 else: |
| 753 len(linktgt.alt_u8path), | 748 linkdgst.update(linktgt.u8path) |
| 754 linktgt.alt_u8path)) | |
| 755 else: | |
| 756 linkdgst.update(util.interpolate_bytes( | |
| 757 b"%d:%s,", | |
| 758 len(linktgt.u8path), | |
| 759 linktgt.u8path)) | |
| 760 if fso.u8name is None: | 749 if fso.u8name is None: |
| 761 dir_tainted = True | 750 dir_tainted = True |
| 762 dir_dgst.update(util.interpolate_bytes( | 751 dir_dgst.update(util.interpolate_bytes( |
| 763 b"1:F,%d:%s,", | 752 b"1:@,%d:%s,", |
| 764 len(fso.alt_u8name), | 753 len(fso.alt_u8name), |
| 765 fso.alt_u8name)) | 754 fso.alt_u8name)) |
| 766 else: | 755 else: |
| 767 dir_dgst.update(util.interpolate_bytes( | 756 dir_dgst.update(util.interpolate_bytes( |
| 768 b"1:F,%d:%s,", len(fso.u8name), fso.u8name)) | 757 b"1:@,%d:%s,", len(fso.u8name), fso.u8name)) |
| 769 else: | 758 else: |
| 770 if linktgt.fspath is None: | 759 if linktgt.fspath is None: |
| 771 dir_tainted = True | 760 dir_tainted = True |
| 772 linkdgst.update(util.interpolate_bytes( | 761 linkdgst.update(linktgt.alt_fspath) |
| 773 b"%d:%s,", | 762 else: |
| 774 len(linktgt.alt_fspath), | 763 linkdgst.update(linktgt.fspath) |
| 775 linktgt.alt_fspath)) | |
| 776 else: | |
| 777 linkdgst.update(util.interpolate_bytes( | |
| 778 b"%d:%s,", | |
| 779 len(linktgt.fspath), | |
| 780 linktgt.fspath)) | |
| 781 if fso.fsname is None: | 764 if fso.fsname is None: |
| 782 dir_tainted = True | 765 dir_tainted = True |
| 783 dir_dgst.update(util.interpolate_bytes( | 766 dir_dgst.update(util.interpolate_bytes( |
| 784 b"1:F,%d:%s,", | 767 b"1:@,%d:%s,", |
| 785 len(fso.alt_fsname), | 768 len(fso.alt_fsname), |
| 786 fso.alt_fsname)) | 769 fso.alt_fsname)) |
| 787 else: | 770 else: |
| 788 dir_dgst.update(util.interpolate_bytes( | 771 dir_dgst.update(util.interpolate_bytes( |
| 789 b"1:F,%d:%s,", len(fso.fsname), fso.fsname)) | 772 b"1:@,%d:%s,", len(fso.fsname), fso.fsname)) |
| 790 # | 773 # |
| 791 # - no mtime and no mode for symlinks | 774 # - no mtime and no mode for symlinks |
| 792 # - also does not count for dir_size | 775 # - also does not count for dir_size |
| 793 # | 776 # |
| 794 dir_dgst.update(util.interpolate_bytes( | 777 dir_dgst.update(util.interpolate_bytes( |
| 795 b"%d:%s,", | 778 b"%d:%s,%d:%s,", |
| 779 len(self._algorithm[1]), util.b(self._algorithm[1]), | |
| 796 len(linkdgst.digest()), linkdgst.digest())) | 780 len(linkdgst.digest()), linkdgst.digest())) |
| 797 opath = join_output_path(top, fso.name) | 781 opath = join_output_path(top, fso.name) |
| 798 if self._utf8_mode: | 782 if self._utf8_mode: |
| 799 opath = walk.WalkDirEntry.alt_u8(opath) | 783 opath = walk.WalkDirEntry.alt_u8(opath) |
| 800 else: | 784 else: |
| 816 | 800 |
| 817 if self._utf8_mode: | 801 if self._utf8_mode: |
| 818 if fso.u8name is None: | 802 if fso.u8name is None: |
| 819 dir_tainted = True | 803 dir_tainted = True |
| 820 dir_dgst.update(util.interpolate_bytes( | 804 dir_dgst.update(util.interpolate_bytes( |
| 821 b"1:f,%d:%s,", | 805 b"0:,%d:%s,", |
| 822 len(fso.alt_u8name), | 806 len(fso.alt_u8name), |
| 823 fso.alt_u8name)) | 807 fso.alt_u8name)) |
| 824 else: | 808 else: |
| 825 dir_dgst.update(util.interpolate_bytes( | 809 dir_dgst.update(util.interpolate_bytes( |
| 826 b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) | 810 b"0:,%d:%s,", len(fso.u8name), fso.u8name)) |
| 827 else: | 811 else: |
| 828 if fso.fsname is None: | 812 if fso.fsname is None: |
| 829 dir_tainted = True | 813 dir_tainted = True |
| 830 dir_dgst.update(util.interpolate_bytes( | 814 dir_dgst.update(util.interpolate_bytes( |
| 831 b"1:f,%d:%s,", | 815 b"0:,%d:%s,", |
| 832 len(fso.alt_fsname), | 816 len(fso.alt_fsname), |
| 833 fso.alt_fsname)) | 817 fso.alt_fsname)) |
| 834 else: | 818 else: |
| 835 dir_dgst.update(util.interpolate_bytes( | 819 dir_dgst.update(util.interpolate_bytes( |
| 836 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) | 820 b"0:,%d:%s,", len(fso.fsname), fso.fsname)) |
| 837 opath = join_output_path(top, fso.name) | 821 opath = join_output_path(top, fso.name) |
| 838 if self._utf8_mode: | 822 if self._utf8_mode: |
| 839 opath = walk.WalkDirEntry.alt_u8(opath) | 823 opath = walk.WalkDirEntry.alt_u8(opath) |
| 840 else: | 824 else: |
| 841 opath = walk.WalkDirEntry.alt_fs(opath) | 825 opath = walk.WalkDirEntry.alt_fs(opath) |
| 888 dgst = digest.compute_digest_file( | 872 dgst = digest.compute_digest_file( |
| 889 self._algorithm[0], | 873 self._algorithm[0], |
| 890 fso.path, | 874 fso.path, |
| 891 use_mmap=self._use_mmap) | 875 use_mmap=self._use_mmap) |
| 892 dir_dgst.update(util.interpolate_bytes( | 876 dir_dgst.update(util.interpolate_bytes( |
| 893 b"%d:%s,", len(dgst), dgst)) | 877 b"%d:%s,%d:%s,", |
| 878 len(self._algorithm[1]), | |
| 879 util.b(self._algorithm[1]), | |
| 880 len(dgst), | |
| 881 dgst)) | |
| 894 if self._size_only: | 882 if self._size_only: |
| 895 self._writer.write_size(opath, fso.stat.st_size) | 883 self._writer.write_size(opath, fso.stat.st_size) |
| 896 else: | 884 else: |
| 897 sz = fso.stat.st_size if self._print_size else None | 885 sz = fso.stat.st_size if self._print_size else None |
| 898 self._writer.write_file_digest( | 886 self._writer.write_file_digest( |
