comparison cutils/treesum.py @ 261:a3e25957afb7

treesum: instead of using format_bsd_line use a real write object with specialized methods
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 16 Feb 2025 14:17:10 +0100
parents 07a0bc723139
children c3d6599c1b5e
comparison
equal deleted inserted replaced
260:07a0bc723139 261:a3e25957afb7
436 else: 436 else:
437 out_cm = open(opts.output, "wb") 437 out_cm = open(opts.output, "wb")
438 out_cm = CRC32Output(out_cm) 438 out_cm = CRC32Output(out_cm)
439 439
440 with out_cm as outfp: 440 with out_cm as outfp:
441 writer = TreesumWriter(outfp)
441 for d in opts.directories: 442 for d in opts.directories:
442 V1DirectoryTreesumGenerator( 443 V1DirectoryTreesumGenerator(
443 opts.algorithm, opts.mmap, opts.base64, 444 opts.algorithm, opts.mmap, opts.base64,
444 opts.follow_symlinks, 445 opts.follow_symlinks,
445 opts.generator, 446 opts.generator,
448 opts.metadata_mtime, 449 opts.metadata_mtime,
449 opts.size_only, 450 opts.size_only,
450 opts.print_size, 451 opts.print_size,
451 opts.utf8, 452 opts.utf8,
452 minimal=opts.minimal).generate( 453 minimal=opts.minimal).generate(
453 outfp, d, comment=opts.comment) 454 writer, d, comment=opts.comment)
454 455
455 456
456 class V1DirectoryTreesumGenerator(object): 457 class V1DirectoryTreesumGenerator(object):
457 458
458 def __init__(self, algorithm, use_mmap, use_base64, 459 def __init__(self, algorithm, use_mmap, use_base64,
473 self._size_only = size_only 474 self._size_only = size_only
474 self._print_size = print_size 475 self._print_size = print_size
475 self._utf8_mode = utf8_mode 476 self._utf8_mode = utf8_mode
476 self._minimal = minimal 477 self._minimal = minimal
477 478
478 def generate(self, outfp, root, comment=None): 479 def generate(self, writer, root, comment=None):
479 """ 480 """
480 481
481 :param outfp: a *binary* file with a "write()" and a "flush()" method 482 :param outfp: a *binary* file with a "write()" and a "flush()" method
482 483
483 """ 484 """
484 self._outfp = outfp 485 self._writer = writer
485 self._outfp.resetdigest() 486 self._writer.start("1")
486 self._outfp.write(format_bsd_line("VERSION", "1", None, False)) 487 self._writer.write_fsencoding(util.n(walk.getfsencoding().upper()))
487 self._outfp.write(format_bsd_line( 488 self._writer.flush()
488 "FSENCODING", util.n(walk.getfsencoding().upper()), None, False))
489 self._outfp.flush()
490 489
491 if self._with_generator == "none": 490 if self._with_generator == "none":
492 pass # do nothing 491 pass # do nothing
493 elif self._with_generator == "normal": 492 elif self._with_generator == "normal":
494 self._outfp.write(format_bsd_line( 493 self._writer.write_generator("PY2" if util.PY2 else "PY3")
495 "GENERATOR", None, b"PY2" if util.PY2 else b"PY3", False))
496 elif self._with_generator == "full": 494 elif self._with_generator == "full":
497 import platform 495 import platform
498 info = "%s %s, %s" % (platform.python_implementation(), 496 info = "%s %s, %s" % (platform.python_implementation(),
499 platform.python_version(), 497 platform.python_version(),
500 platform.platform()) 498 platform.platform())
501 self._outfp.write(format_bsd_line( 499 self._writer.write_generator(info)
502 "GENERATOR", None, info.encode("utf-8"), False))
503 else: 500 else:
504 raise NotImplementedError( 501 raise NotImplementedError(
505 "not implemented: %s" % (self._with_generator,)) 502 "not implemented: %s" % (self._with_generator,))
506 503
507 # 504 #
527 if self._size_only: 524 if self._size_only:
528 flags.append("size-only") 525 flags.append("size-only")
529 flags.append("utf8-encoding" if self._utf8_mode else "fs-encoding") 526 flags.append("utf8-encoding" if self._utf8_mode else "fs-encoding")
530 if self._print_size: 527 if self._print_size:
531 flags.append("print-size") 528 flags.append("print-size")
532 flags.sort() 529 self._writer.write_flags(flags)
533 self._outfp.write(
534 format_bsd_line("FLAGS", ",".join(flags), None, False))
535 530
536 if self._minimal is None: 531 if self._minimal is None:
537 # Write execution timestamps in POSIX epoch and ISO format 532 # Write execution timestamps in POSIX epoch and ISO format
538 ts = int(time.time()) 533 ts = int(time.time())
539 self._outfp.write(format_bsd_line("TIMESTAMP", ts, None, False)) 534 self._writer.write_timestamp(ts)
540 ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T") 535 ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T")
541 self._outfp.write(format_bsd_line("ISOTIMESTAMP", ts, None, False)) 536 self._writer.write_isotimestamp(ts)
542 537
543 if comment: 538 if comment:
544 for line in comment: 539 for line in comment:
545 self._outfp.write( 540 self._writer.write_comment(line)
546 format_bsd_line("COMMENT", None, line, False))
547 541
548 if self._minimal is not None: 542 if self._minimal is not None:
549 self._outfp.write(format_bsd_line( 543 self._writer.write_root(
550 "ROOT",
551 None,
552 (walk.WalkDirEntry.alt_u8(self._minimal) 544 (walk.WalkDirEntry.alt_u8(self._minimal)
553 if self._minimal else b""), 545 if self._minimal else b""))
554 False))
555 else: 546 else:
556 self._outfp.write(format_bsd_line( 547 self._writer.write_root(walk.WalkDirEntry.alt_u8(root))
557 "ROOT", None, walk.WalkDirEntry.alt_u8(root), False)) 548 self._writer.flush()
558 self._outfp.flush()
559 549
560 if not self._follow_symlinks.command_line and os.path.islink(root): 550 if not self._follow_symlinks.command_line and os.path.islink(root):
561 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) 551 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
562 linkdgst = self._algorithm[0]() 552 linkdgst = self._algorithm[0]()
563 linkdgst.update( 553 linkdgst.update(
567 dir_dgst.update(b"1:L,") 557 dir_dgst.update(b"1:L,")
568 dir_dgst.update( 558 dir_dgst.update(
569 util.interpolate_bytes( 559 util.interpolate_bytes(
570 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) 560 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest()))
571 if self._size_only: 561 if self._size_only:
572 self._outfp.write( 562 self._writer.write_size(b"./@/", 0)
573 format_bsd_line(
574 "SIZE",
575 None,
576 "./@/",
577 False,
578 0))
579 else: 563 else:
580 self._outfp.write( 564 self._writer.write_file_digest(
581 format_bsd_line( 565 self._algorithm[1],
582 self._algorithm[1], 566 b"./@/",
583 dir_dgst.digest(), 567 dir_dgst.digest(),
584 "./@/", 568 self._use_base64)
585 self._use_base64)) 569 self._writer.flush()
586 self._outfp.flush() 570 self._writer.finish()
587 self._outfp.write(format_bsd_line(
588 "CRC32", self._outfp.hexcrcdigest(), None, False))
589 return 571 return
590 572
591 self._generate(os.path.normpath(root), tuple()) 573 self._generate(os.path.normpath(root), tuple())
592 self._outfp.write(format_bsd_line( 574 self._writer.finish()
593 "CRC32", self._outfp.hexcrcdigest(), None, False))
594 575
595 def _generate(self, root, top): 576 def _generate(self, root, top):
596 logging.debug("Handling %s/%r", root, top) 577 logging.debug("Handling %s/%r", root, top)
597 path = os.path.join(root, *top) if top else root 578 path = os.path.join(root, *top) if top else root
598 with walk.ScanDir(path) as dirscan: 579 with walk.ScanDir(path) as dirscan:
664 if self._utf8_mode: 645 if self._utf8_mode:
665 opath = walk.WalkDirEntry.alt_u8(opath) 646 opath = walk.WalkDirEntry.alt_u8(opath)
666 else: 647 else:
667 opath = walk.WalkDirEntry.alt_fs(opath) 648 opath = walk.WalkDirEntry.alt_fs(opath)
668 if self._size_only: 649 if self._size_only:
669 self._outfp.write(format_bsd_line( 650 self._writer.write_size(
670 "SIZE", None,
671 util.interpolate_bytes(b"%s/./@/", opath), 651 util.interpolate_bytes(b"%s/./@/", opath),
672 False, 0)) 652 0)
673 else: 653 else:
674 self._outfp.write(format_bsd_line( 654 self._writer.write_file_digest(
675 self._algorithm[1], 655 self._algorithm[1],
656 util.interpolate_bytes(b"%s/./@/", opath),
676 linkdgst.digest(), 657 linkdgst.digest(),
677 util.interpolate_bytes(b"%s/./@/", opath), 658 self._use_base64)
678 self._use_base64)) 659 self._writer.flush()
679 self._outfp.flush()
680 else: 660 else:
681 # 661 #
682 # Follow the symlink to dir or handle a "real" directory 662 # Follow the symlink to dir or handle a "real" directory
683 # 663 #
684 664
777 if self._utf8_mode: 757 if self._utf8_mode:
778 opath = walk.WalkDirEntry.alt_u8(opath) 758 opath = walk.WalkDirEntry.alt_u8(opath)
779 else: 759 else:
780 opath = walk.WalkDirEntry.alt_fs(opath) 760 opath = walk.WalkDirEntry.alt_fs(opath)
781 if self._size_only: 761 if self._size_only:
782 self._outfp.write(format_bsd_line( 762 self._writer.write_size(
783 "SIZE", None,
784 util.interpolate_bytes(b"%s/./@", opath), 763 util.interpolate_bytes(b"%s/./@", opath),
785 False, 0)) 764 0)
786 else: 765 else:
787 self._outfp.write(format_bsd_line( 766 self._writer.write_file_digest(
788 self._algorithm[1], 767 self._algorithm[1],
768 util.interpolate_bytes(b"%s/./@", opath),
789 linkdgst.digest(), 769 linkdgst.digest(),
790 util.interpolate_bytes(b"%s/./@", opath), 770 self._use_base64)
791 self._use_base64)) 771 self._writer.flush()
792 self._outfp.flush()
793 else: 772 else:
794 # 773 #
795 # Follow the symlink to file or handle a "real" file 774 # Follow the symlink to file or handle a "real" file
796 # 775 #
797 776
842 if self._utf8_mode: 821 if self._utf8_mode:
843 opath = walk.WalkDirEntry.alt_u8(opath) 822 opath = walk.WalkDirEntry.alt_u8(opath)
844 else: 823 else:
845 opath = walk.WalkDirEntry.alt_fs(opath) 824 opath = walk.WalkDirEntry.alt_fs(opath)
846 if self._size_only: 825 if self._size_only:
847 self._outfp.write(format_bsd_line( 826 self._writer.write_size(opath, fso.stat.st_size)
848 "SIZE", None, opath, False, fso.stat.st_size))
849 else: 827 else:
850 if self._print_size: 828 sz = fso.stat.st_size if self._print_size else None
851 self._outfp.write(format_bsd_line( 829 self._writer.write_file_digest(
852 self._algorithm[1], 830 self._algorithm[1], opath, dgst,
853 dgst, opath, 831 use_base64=self._use_base64,
854 self._use_base64, 832 size=sz)
855 fso.stat.st_size)) 833 self._writer.flush()
856 else:
857 self._outfp.write(format_bsd_line(
858 self._algorithm[1], dgst, opath,
859 self._use_base64))
860 self._outfp.flush()
861 opath = join_output_path(top, None) 834 opath = join_output_path(top, None)
862 if opath: 835 if opath:
863 if self._utf8_mode: 836 if self._utf8_mode:
864 opath = walk.WalkDirEntry.alt_u8(opath) 837 opath = walk.WalkDirEntry.alt_u8(opath)
865 else: 838 else:
866 opath = walk.WalkDirEntry.alt_fs(opath) 839 opath = walk.WalkDirEntry.alt_fs(opath)
867 if self._size_only: 840 if self._size_only:
868 self._outfp.write(format_bsd_line( 841 self._writer.write_size(opath, dir_size)
869 "SIZE", None, opath, False, dir_size))
870 else: 842 else:
871 if dir_tainted: 843 if dir_tainted:
872 # 844 #
873 # IMPORTANT: Print errors BEFORE the associated digest line. 845 # IMPORTANT: Print errors BEFORE the associated digest line.
874 # Otherwise the "info" command has a problem. 846 # Otherwise the "info" command has a problem.
875 # 847 #
876 self._outfp.write(format_bsd_line( 848 self._writer.write_error(b"directory is tainted")
877 b"ERROR", None, b"directory is tainted", False, None))
878 logging.error("Directory has filename problems: %r", opath) 849 logging.error("Directory has filename problems: %r", opath)
879 if self._print_size: 850 sz = dir_size if self._print_size else None
880 self._outfp.write(format_bsd_line( 851 self._writer.write_file_digest(
881 self._algorithm[1], dir_dgst.digest(), opath, 852 self._algorithm[1], opath, dir_dgst.digest(),
882 self._use_base64, dir_size)) 853 use_base64=self._use_base64, size=sz)
883 else: 854 self._writer.flush()
884 self._outfp.write(format_bsd_line(
885 self._algorithm[1], dir_dgst.digest(), opath,
886 self._use_base64))
887 self._outfp.flush()
888 return (dir_dgst.digest(), dir_size) 855 return (dir_dgst.digest(), dir_size)
889 856
890 857
891 def join_output_path(top, name): 858 def join_output_path(top, name):
892 if name is None: 859 if name is None:
969 if not modestr.startswith("0"): 936 if not modestr.startswith("0"):
970 modestr = "0" + modestr 937 modestr = "0" + modestr
971 return modestr 938 return modestr
972 939
973 940
974 def format_bsd_line(what, value, filename, use_base64, size=None): 941 class TreesumWriter(object):
975 ls = util.b(os.linesep) 942
976 if not isinstance(what, bytes): 943 """Writer to write treesum digest files in a format similar to BSD
977 what = what.encode("ascii") 944 digest files.
978 if what == b"TIMESTAMP": 945
979 assert filename is None 946 Wraps an output file pointer for a binary file.
980 return util.interpolate_bytes(b"TIMESTAMP = %d%s", value, ls) 947
981 if what in (b"FSENCODING", b"ISOTIMESTAMP", b"FLAGS", b"VERSION", 948 Provides high-level methods to write data lines.
982 b"CRC32"): 949
983 assert filename is None 950 Also holds the current CRC for a block.
984 return util.interpolate_bytes(b"%s = %s%s", what, util.b(value), ls) 951
985 assert filename is not None 952 """
986 if what in (b"COMMENT", b"ERROR", b"GENERATOR"): 953
987 return util.interpolate_bytes( 954 LS = util.b(os.linesep)
988 b"%s (%s)%s", what, util.b(filename, "utf-8"), ls) 955
989 if not isinstance(filename, bytes): 956 def __init__(self, outfp):
990 filename = util.fsencode(filename) 957 self._outfp = outfp
991 if what == b"SIZE": 958 self._reset_crc()
992 return util.interpolate_bytes(b"SIZE (%s) = %d%s", filename, size, ls) 959
993 if value is None: 960 def _reset_crc(self):
994 return util.interpolate_bytes(b"%s (%s)%s", what, filename, ls) 961 self._crc = crc32()
995 if use_base64: 962
996 value = base64.b64encode(value) 963 def start(self, version):
997 else: 964 """Begin a new block, reset the current CRC and write the VERSION
998 value = binascii.hexlify(value) 965 tag.
999 if filename != b"./@/": 966
1000 filename = util.normalize_filename(filename, True) 967 """
1001 if size is None: 968 self._reset_crc()
1002 return util.interpolate_bytes( 969 self.write(b"VERSION = ")
1003 b"%s (%s) = %s%s", what, filename, value, ls) 970 self.writeln(util.b(version))
1004 else: 971
1005 return util.interpolate_bytes( 972 def write_comment(self, comment):
1006 b"%s (%s) = %s,%d%s", what, filename, value, size, ls) 973 self.write(b"COMMENT (")
974 self.write(util.b(comment, "utf-8"))
975 self.writeln(b")")
976
977 def write_generator(self, generator):
978 self.write(b"GENERATOR (")
979 self.write(util.b(generator, "utf-8"))
980 self.writeln(b")")
981
982 def write_error(self, error):
983 self.write(b"ERROR (")
984 self.write(util.b(error, "utf-8"))
985 self.writeln(b")")
986
987 def write_fsencoding(self, encoding):
988 self.write(b"FSENCODING = ")
989 self.writeln(util.b(encoding))
990
991 def write_flags(self, flags):
992 self.write(b"FLAGS = ")
993 if isinstance(flags, (str, bytes, bytearray)):
994 self.writeln(util.b(flags))
995 else:
996 flags.sort()
997 self.writeln(util.b(",".join(flags)))
998
999 def write_timestamp(self, ts):
1000 self.write(b"TIMESTAMP = ")
1001 self.writeln(util.b(str(ts)))
1002
1003 def write_isotimestamp(self, ts):
1004 self.write(b"ISOTIMESTAMP = ")
1005 self.writeln(util.b(ts))
1006
1007 def write_root(self, root):
1008 assert isinstance(root, bytes)
1009 self.write(b"ROOT (")
1010 self.write(root)
1011 self.writeln(b")")
1012
1013 def write_size(self, filename, sz):
1014 assert isinstance(filename, bytes)
1015 self.write(b"SIZE (")
1016 self.write(filename)
1017 self.write(b") = ")
1018 self.writeln(util.b(str(sz)))
1019
1020 def write_file_digest(self, algorithm, filename, digest,
1021 use_base64=False, size=None):
1022 digest = (base64.b64encode(digest)
1023 if use_base64
1024 else binascii.hexlify(digest))
1025 if filename != b"./@/":
1026 filename = util.normalize_filename(filename, True)
1027 self.write(util.b(algorithm))
1028 self.write(b" (")
1029 self.write(filename)
1030 self.write(b") = ")
1031 self.write(digest)
1032 if size is not None:
1033 self.write(b",")
1034 self.writeln(util.b(str(size)))
1035 else:
1036 self.writeln(b"")
1037
1038 def finish(self):
1039 """Finish a block and write the current CRC"""
1040 crc = self._crc.hexdigest()
1041 self.write(b"CRC32 = ")
1042 self.writeln(util.b(crc))
1043
1044 def writeln(self, line):
1045 """Write the bytes `line` into the output file and update the CRC
1046 accordingly.
1047
1048 :param bytes line: The line to write to (without line ending)
1049
1050 """
1051 self.write(line)
1052 self.write(self.LS)
1053
1054 def write(self, data):
1055 """Write `data` into the output file and update the CRC accordingly.
1056
1057 :param bytes data: The data to write to and to update the CRC with
1058
1059 """
1060 if data:
1061 self._outfp.write(data)
1062 self._crc.update(data)
1063
1064 def flush(self):
1065 self._outfp.flush()
1007 1066
1008 1067
1009 class TreesumReader(object): 1068 class TreesumReader(object):
1010 1069
1011 """Reader to read and/or verify treesum digest files. 1070 """Reader to read and/or verify treesum digest files.