comparison cutils/treesum.py @ 217:8e38c07c4b85

Handle symlinks to files fully and Implement no-follow-file-symlinks
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 25 Jan 2025 09:52:22 +0100
parents 5a2d9ec204ce
children dee891ed2307
comparison
equal deleted inserted replaced
216:454e240e631b 217:8e38c07c4b85
67 "--follow-directory-symlinks", "-l", action=SymlinkAction, 67 "--follow-directory-symlinks", "-l", action=SymlinkAction,
68 const="follow-directory-symlinks", 68 const="follow-directory-symlinks",
69 default=FollowSymlinkConfig(False, False, True), 69 default=FollowSymlinkConfig(False, False, True),
70 dest="follow_symlinks", 70 dest="follow_symlinks",
71 help="""Follow symbolic links to directories when walking a 71 help="""Follow symbolic links to directories when walking a
72 directory tree. Augments --physical.""") 72 directory tree. Augments --physical and -p.""")
73 gp.add_argument( 73 gp.add_argument(
74 "--follow-file-symlinks", action=SymlinkAction, 74 "--follow-file-symlinks", action=SymlinkAction,
75 const="follow-file-symlinks", 75 const="follow-file-symlinks",
76 default=FollowSymlinkConfig(False, False, True), 76 default=FollowSymlinkConfig(False, False, True),
77 dest="follow_symlinks", 77 dest="follow_symlinks",
94 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks", 94 "--logical", "-L", action=SymlinkAction, dest="follow_symlinks",
95 const=FollowSymlinkConfig(True, True, True), 95 const=FollowSymlinkConfig(True, True, True),
96 help="""Follow symbolic links everywhere: on command line 96 help="""Follow symbolic links everywhere: on command line
97 arguments and -- while walking -- directory and file symbolic links. 97 arguments and -- while walking -- directory and file symbolic links.
98 Overwrites any other symlink related options 98 Overwrites any other symlink related options
99 (--physical, no-follow-directory-symlinks, no-follow-file-symlinks, et al.). 99 (--physical,-p, no-follow-directory-symlinks, no-follow-file-symlinks,
100 et al.).
100 """) 101 """)
101 gp.add_argument( 102 gp.add_argument(
102 "--minimal", nargs="?", const="", default=None, metavar="TAG", 103 "--minimal", nargs="?", const="", default=None, metavar="TAG",
103 help="Produce minimal output only. If a TAG is given and not " 104 help="Produce minimal output only. If a TAG is given and not "
104 "empty use it as the leading \"ROOT (<TAG>)\" output.") 105 "empty use it as the leading \"ROOT (<TAG>)\" output.")
126 gp.add_argument( 127 gp.add_argument(
127 "--no-follow-file-symlinks", action=SymlinkAction, 128 "--no-follow-file-symlinks", action=SymlinkAction,
128 const="no-follow-file-symlinks", 129 const="no-follow-file-symlinks",
129 dest="follow_symlinks", 130 dest="follow_symlinks",
130 help="""Dont follow symbolic links to files when walking a 131 help="""Dont follow symbolic links to files when walking a
131 directory tree. Augments --logical.""") 132 directory tree. Augments --logical and -p.""")
132 gp.add_argument( 133 gp.add_argument(
133 "--no-mmap", action="store_false", dest="mmap", default=None, 134 "--no-mmap", action="store_false", dest="mmap", default=None,
134 help="Dont use mmap.") 135 help="Dont use mmap.")
135 gp.add_argument( 136 gp.add_argument(
136 "--output", "-o", action="store", metavar="OUTPUT", 137 "--output", "-o", action="store", metavar="OUTPUT",
140 "--physical", "-P", action=SymlinkAction, dest="follow_symlinks", 141 "--physical", "-P", action=SymlinkAction, dest="follow_symlinks",
141 const=FollowSymlinkConfig(False, False, False), 142 const=FollowSymlinkConfig(False, False, False),
142 help="""Do not follow any symbolic links whether they are given 143 help="""Do not follow any symbolic links whether they are given
143 on the command line or when walking the directory tree. 144 on the command line or when walking the directory tree.
144 Overwrites any other symlink related options 145 Overwrites any other symlink related options
145 (--logical, follow-directory-symlinks, follow-file-symlinks, et al.). 146 (--logical, -p, follow-directory-symlinks, follow-file-symlinks, et al.).
147 This is the default.""")
148 gp.add_argument(
149 "-p", action=SymlinkAction, dest="follow_symlinks",
150 const=FollowSymlinkConfig(False, False, True),
151 help="""Do not follow any symbolic links to directories,
152 whether they are given on the command line or when walking the directory tree,
153 but follow symbolic links to files.
154 Overwrites any other symlink related options
155 (--logical, --physical, follow-directory-symlinks, no-follow-file-symlinks,
156 et al.).
146 This is the default.""") 157 This is the default.""")
147 gp.add_argument( 158 gp.add_argument(
148 "--print-size", action="store_true", 159 "--print-size", action="store_true",
149 help="""Print the size of a file or the accumulated sizes of 160 help="""Print the size of a file or the accumulated sizes of
150 directory content into the output also. 161 directory content into the output also.
334 curval.command_line, curval.directory, False) 345 curval.command_line, curval.directory, False)
335 else: 346 else:
336 assert False, "Implementation error: not yet implemented" 347 assert False, "Implementation error: not yet implemented"
337 348
338 # Not following symlinks to files is not yet supported: reset to True 349 # Not following symlinks to files is not yet supported: reset to True
339 if not curval.file: 350 # if not curval.file:
340 curval = FollowSymlinkConfig( 351 # curval = FollowSymlinkConfig(
341 curval.command_line, curval.directory, True) 352 # curval.command_line, curval.directory, True)
342 logging.warning("Coercing options to `follow-file-symlinks'") 353 # logging.warning("Coercing options to `follow-file-symlinks'")
343 setattr(namespace, self.dest, curval) 354 setattr(namespace, self.dest, curval)
344 355
345 356
346 def gen_generate_opts(directories=[], 357 def gen_generate_opts(directories=[],
347 algorithm=util.default_algotag(), 358 algorithm=util.default_algotag(),
358 mtime=False, 369 mtime=False,
359 output=None, 370 output=None,
360 print_size=False, 371 print_size=False,
361 size_only=False, 372 size_only=False,
362 utf8=False): 373 utf8=False):
363 # Not following symlinks to files is not yet supported: reset to True
364 if not isinstance(follow_symlinks, FollowSymlinkConfig): 374 if not isinstance(follow_symlinks, FollowSymlinkConfig):
365 raise TypeError("`follow_symlinks' must be a FollowSymlinkConfig") 375 raise TypeError("`follow_symlinks' must be a FollowSymlinkConfig")
366 if not follow_symlinks.file: 376 # Not following symlinks to files is not yet supported: reset to True
367 follow_symlinks = follow_symlinks._make([follow_symlinks.command_line, 377 # if not follow_symlinks.file:
368 follow_symlinks.directory, 378 # follow_symlinks = follow_symlinks._make([follow_symlinks.command_line,
369 True]) 379 # follow_symlinks.directory,
370 logging.warning("Coercing to follow-symlinks-file") 380 # True])
381 # logging.warning("Coercing to follow-symlinks-file")
371 opts = argparse.Namespace( 382 opts = argparse.Namespace(
372 directories=directories, 383 directories=directories,
373 algorithm=util.argv2algo(algorithm), 384 algorithm=util.argv2algo(algorithm),
374 append_output=append_output, 385 append_output=append_output,
375 base64=base64, 386 base64=base64,
579 self._generate(os.path.normpath(root), tuple()) 590 self._generate(os.path.normpath(root), tuple())
580 self._outfp.write(format_bsd_line( 591 self._outfp.write(format_bsd_line(
581 "CRC32", self._outfp.hexcrcdigest(), None, False)) 592 "CRC32", self._outfp.hexcrcdigest(), None, False))
582 593
583 def _generate(self, root, top): 594 def _generate(self, root, top):
584 # This is currently always True
585 assert self._follow_symlinks.file
586
587 logging.debug("Handling %s/%r", root, top) 595 logging.debug("Handling %s/%r", root, top)
588 path = os.path.join(root, *top) if top else root 596 path = os.path.join(root, *top) if top else root
589 with walk.ScanDir(path) as dirscan: 597 with walk.ScanDir(path) as dirscan:
590 fsobjects = list(dirscan) 598 fsobjects = list(dirscan)
591 if self._utf8_mode: 599 if self._utf8_mode:
706 modestr = util.b(normalized_compatible_mode_str( 714 modestr = util.b(normalized_compatible_mode_str(
707 fso.stat.st_mode)) 715 fso.stat.st_mode))
708 dir_dgst.update(util.interpolate_bytes( 716 dir_dgst.update(util.interpolate_bytes(
709 b"4:mode,%d:%s,", len(modestr), modestr)) 717 b"4:mode,%d:%s,", len(modestr), modestr))
710 else: 718 else:
711 if self._utf8_mode: 719 if fso.is_symlink and not self._follow_symlinks.file:
712 if fso.u8name is None: 720 linktgt = walk.WalkDirEntry.from_readlink(
713 dir_tainted = True 721 os.readlink(fso.path))
714 dir_dgst.update(util.interpolate_bytes( 722 # linktgt = util.fsencode(os.readlink(fso.path)))
715 b"1:f,%d:%s,", 723 linkdgst = self._algorithm[0]()
716 len(fso.alt_u8name), 724 if self._utf8_mode:
717 fso.alt_u8name)) 725 if linktgt.u8path is None:
726 dir_tainted = True
727 linkdgst.update(util.interpolate_bytes(
728 b"%d:%s,",
729 len(linktgt.alt_u8path),
730 linktgt.alt_u8path))
731 else:
732 linkdgst.update(util.interpolate_bytes(
733 b"%d:%s,",
734 len(linktgt.u8path),
735 linktgt.u8path))
736 if fso.u8name is None:
737 dir_tainted = True
738 dir_dgst.update(util.interpolate_bytes(
739 b"1:F,%d:%s,",
740 len(fso.alt_u8name),
741 fso.alt_u8name))
742 else:
743 dir_dgst.update(util.interpolate_bytes(
744 b"1:F,%d:%s,", len(fso.u8name), fso.u8name))
718 else: 745 else:
719 dir_dgst.update(util.interpolate_bytes( 746 if linktgt.fspath is None:
720 b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) 747 dir_tainted = True
721 else: 748 linkdgst.update(util.interpolate_bytes(
722 if fso.fsname is None: 749 b"%d:%s,",
723 dir_tainted = True 750 len(linktgt.alt_fspath),
724 dir_dgst.update(util.interpolate_bytes( 751 linktgt.alt_fspath))
725 b"1:f,%d:%s,", 752 else:
726 len(fso.alt_fsname), 753 linkdgst.update(util.interpolate_bytes(
727 fso.alt_fsname)) 754 b"%d:%s,",
755 len(linktgt.fspath),
756 linktgt.fspath))
757 if fso.fsname is None:
758 dir_tainted = True
759 dir_dgst.update(util.interpolate_bytes(
760 b"1:F,%d:%s,",
761 len(fso.alt_fsname),
762 fso.alt_fsname))
763 else:
764 dir_dgst.update(util.interpolate_bytes(
765 b"1:F,%d:%s,", len(fso.fsname), fso.fsname))
766 #
767 # - no mtime and no mode for symlinks
768 # - also does not count for dir_size
769 #
770 dir_dgst.update(util.interpolate_bytes(
771 b"%d:%s,",
772 len(linkdgst.digest()), linkdgst.digest()))
773 opath = join_output_path(top, fso.name)
774 if self._utf8_mode:
775 opath = walk.WalkDirEntry.alt_u8(opath)
728 else: 776 else:
729 dir_dgst.update(util.interpolate_bytes( 777 opath = walk.WalkDirEntry.alt_fs(opath)
730 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) 778 if self._size_only:
731 dir_size += fso.stat.st_size
732 if self._with_metadata_mtime:
733 mtime = datetime.datetime.utcfromtimestamp(
734 int(fso.stat.st_mtime))
735 mtime = util.b(mtime.isoformat("T") + "Z")
736 dir_dgst.update(util.interpolate_bytes(
737 b"5:mtime,%d:%s,", len(mtime), mtime))
738 if self._with_metadata_full_mode:
739 modestr = util.b(normalized_mode_str(fso.stat.st_mode))
740 dir_dgst.update(util.interpolate_bytes(
741 b"8:fullmode,%d:%s,", len(modestr), modestr))
742 elif self._with_metadata_mode:
743 modestr = util.b(normalized_compatible_mode_str(
744 fso.stat.st_mode))
745 dir_dgst.update(util.interpolate_bytes(
746 b"4:mode,%d:%s,", len(modestr), modestr))
747 if not self._size_only:
748 dgst = digest.compute_digest_file(
749 self._algorithm[0], fso.path, use_mmap=self._use_mmap)
750 dir_dgst.update(util.interpolate_bytes(
751 b"%d:%s,", len(dgst), dgst))
752 opath = join_output_path(top, fso.name)
753 if self._utf8_mode:
754 opath = walk.WalkDirEntry.alt_u8(opath)
755 else:
756 opath = walk.WalkDirEntry.alt_fs(opath)
757 if self._size_only:
758 self._outfp.write(format_bsd_line(
759 "SIZE", None, opath, False, fso.stat.st_size))
760 else:
761 if self._print_size:
762 self._outfp.write(format_bsd_line( 779 self._outfp.write(format_bsd_line(
763 self._algorithm[1], dgst, opath, self._use_base64, 780 "SIZE", None, "%s/./@" % (opath,), False, 0))
764 fso.stat.st_size))
765 else: 781 else:
766 self._outfp.write(format_bsd_line( 782 self._outfp.write(format_bsd_line(
767 self._algorithm[1], dgst, opath, 783 self._algorithm[1],
784 linkdgst.digest(),
785 "%s/./@" % (opath,),
768 self._use_base64)) 786 self._use_base64))
787 self._outfp.flush()
788 else:
789 # follow symlinks to files
790 if self._utf8_mode:
791 if fso.u8name is None:
792 dir_tainted = True
793 dir_dgst.update(util.interpolate_bytes(
794 b"1:f,%d:%s,",
795 len(fso.alt_u8name),
796 fso.alt_u8name))
797 else:
798 dir_dgst.update(util.interpolate_bytes(
799 b"1:f,%d:%s,", len(fso.u8name), fso.u8name))
800 else:
801 if fso.fsname is None:
802 dir_tainted = True
803 dir_dgst.update(util.interpolate_bytes(
804 b"1:f,%d:%s,",
805 len(fso.alt_fsname),
806 fso.alt_fsname))
807 else:
808 dir_dgst.update(util.interpolate_bytes(
809 b"1:f,%d:%s,", len(fso.fsname), fso.fsname))
810 dir_size += fso.stat.st_size
811 if self._with_metadata_mtime:
812 mtime = datetime.datetime.utcfromtimestamp(
813 int(fso.stat.st_mtime))
814 mtime = util.b(mtime.isoformat("T") + "Z")
815 dir_dgst.update(util.interpolate_bytes(
816 b"5:mtime,%d:%s,", len(mtime), mtime))
817 if self._with_metadata_full_mode:
818 modestr = util.b(normalized_mode_str(fso.stat.st_mode))
819 dir_dgst.update(util.interpolate_bytes(
820 b"8:fullmode,%d:%s,", len(modestr), modestr))
821 elif self._with_metadata_mode:
822 modestr = util.b(normalized_compatible_mode_str(
823 fso.stat.st_mode))
824 dir_dgst.update(util.interpolate_bytes(
825 b"4:mode,%d:%s,", len(modestr), modestr))
826 if not self._size_only:
827 dgst = digest.compute_digest_file(
828 self._algorithm[0],
829 fso.path,
830 use_mmap=self._use_mmap)
831 dir_dgst.update(util.interpolate_bytes(
832 b"%d:%s,", len(dgst), dgst))
833 opath = join_output_path(top, fso.name)
834 if self._utf8_mode:
835 opath = walk.WalkDirEntry.alt_u8(opath)
836 else:
837 opath = walk.WalkDirEntry.alt_fs(opath)
838 if self._size_only:
839 self._outfp.write(format_bsd_line(
840 "SIZE", None, opath, False, fso.stat.st_size))
841 else:
842 if self._print_size:
843 self._outfp.write(format_bsd_line(
844 self._algorithm[1],
845 dgst, opath,
846 self._use_base64,
847 fso.stat.st_size))
848 else:
849 self._outfp.write(format_bsd_line(
850 self._algorithm[1], dgst, opath,
851 self._use_base64))
769 self._outfp.flush() 852 self._outfp.flush()
770 opath = join_output_path(top, None) 853 opath = join_output_path(top, None)
771 if opath: 854 if opath:
772 if self._utf8_mode: 855 if self._utf8_mode:
773 opath = walk.WalkDirEntry.alt_u8(opath) 856 opath = walk.WalkDirEntry.alt_u8(opath)