Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 261:a3e25957afb7
treesum: instead of using format_bsd_line use a real write object with specialized methods
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 16 Feb 2025 14:17:10 +0100 |
| parents | 07a0bc723139 |
| children | c3d6599c1b5e |
comparison
equal
deleted
inserted
replaced
| 260:07a0bc723139 | 261:a3e25957afb7 |
|---|---|
| 436 else: | 436 else: |
| 437 out_cm = open(opts.output, "wb") | 437 out_cm = open(opts.output, "wb") |
| 438 out_cm = CRC32Output(out_cm) | 438 out_cm = CRC32Output(out_cm) |
| 439 | 439 |
| 440 with out_cm as outfp: | 440 with out_cm as outfp: |
| 441 writer = TreesumWriter(outfp) | |
| 441 for d in opts.directories: | 442 for d in opts.directories: |
| 442 V1DirectoryTreesumGenerator( | 443 V1DirectoryTreesumGenerator( |
| 443 opts.algorithm, opts.mmap, opts.base64, | 444 opts.algorithm, opts.mmap, opts.base64, |
| 444 opts.follow_symlinks, | 445 opts.follow_symlinks, |
| 445 opts.generator, | 446 opts.generator, |
| 448 opts.metadata_mtime, | 449 opts.metadata_mtime, |
| 449 opts.size_only, | 450 opts.size_only, |
| 450 opts.print_size, | 451 opts.print_size, |
| 451 opts.utf8, | 452 opts.utf8, |
| 452 minimal=opts.minimal).generate( | 453 minimal=opts.minimal).generate( |
| 453 outfp, d, comment=opts.comment) | 454 writer, d, comment=opts.comment) |
| 454 | 455 |
| 455 | 456 |
| 456 class V1DirectoryTreesumGenerator(object): | 457 class V1DirectoryTreesumGenerator(object): |
| 457 | 458 |
| 458 def __init__(self, algorithm, use_mmap, use_base64, | 459 def __init__(self, algorithm, use_mmap, use_base64, |
| 473 self._size_only = size_only | 474 self._size_only = size_only |
| 474 self._print_size = print_size | 475 self._print_size = print_size |
| 475 self._utf8_mode = utf8_mode | 476 self._utf8_mode = utf8_mode |
| 476 self._minimal = minimal | 477 self._minimal = minimal |
| 477 | 478 |
| 478 def generate(self, outfp, root, comment=None): | 479 def generate(self, writer, root, comment=None): |
| 479 """ | 480 """ |
| 480 | 481 |
| 481 :param outfp: a *binary* file with a "write()" and a "flush()" method | 482 :param outfp: a *binary* file with a "write()" and a "flush()" method |
| 482 | 483 |
| 483 """ | 484 """ |
| 484 self._outfp = outfp | 485 self._writer = writer |
| 485 self._outfp.resetdigest() | 486 self._writer.start("1") |
| 486 self._outfp.write(format_bsd_line("VERSION", "1", None, False)) | 487 self._writer.write_fsencoding(util.n(walk.getfsencoding().upper())) |
| 487 self._outfp.write(format_bsd_line( | 488 self._writer.flush() |
| 488 "FSENCODING", util.n(walk.getfsencoding().upper()), None, False)) | |
| 489 self._outfp.flush() | |
| 490 | 489 |
| 491 if self._with_generator == "none": | 490 if self._with_generator == "none": |
| 492 pass # do nothing | 491 pass # do nothing |
| 493 elif self._with_generator == "normal": | 492 elif self._with_generator == "normal": |
| 494 self._outfp.write(format_bsd_line( | 493 self._writer.write_generator("PY2" if util.PY2 else "PY3") |
| 495 "GENERATOR", None, b"PY2" if util.PY2 else b"PY3", False)) | |
| 496 elif self._with_generator == "full": | 494 elif self._with_generator == "full": |
| 497 import platform | 495 import platform |
| 498 info = "%s %s, %s" % (platform.python_implementation(), | 496 info = "%s %s, %s" % (platform.python_implementation(), |
| 499 platform.python_version(), | 497 platform.python_version(), |
| 500 platform.platform()) | 498 platform.platform()) |
| 501 self._outfp.write(format_bsd_line( | 499 self._writer.write_generator(info) |
| 502 "GENERATOR", None, info.encode("utf-8"), False)) | |
| 503 else: | 500 else: |
| 504 raise NotImplementedError( | 501 raise NotImplementedError( |
| 505 "not implemented: %s" % (self._with_generator,)) | 502 "not implemented: %s" % (self._with_generator,)) |
| 506 | 503 |
| 507 # | 504 # |
| 527 if self._size_only: | 524 if self._size_only: |
| 528 flags.append("size-only") | 525 flags.append("size-only") |
| 529 flags.append("utf8-encoding" if self._utf8_mode else "fs-encoding") | 526 flags.append("utf8-encoding" if self._utf8_mode else "fs-encoding") |
| 530 if self._print_size: | 527 if self._print_size: |
| 531 flags.append("print-size") | 528 flags.append("print-size") |
| 532 flags.sort() | 529 self._writer.write_flags(flags) |
| 533 self._outfp.write( | |
| 534 format_bsd_line("FLAGS", ",".join(flags), None, False)) | |
| 535 | 530 |
| 536 if self._minimal is None: | 531 if self._minimal is None: |
| 537 # Write execution timestamps in POSIX epoch and ISO format | 532 # Write execution timestamps in POSIX epoch and ISO format |
| 538 ts = int(time.time()) | 533 ts = int(time.time()) |
| 539 self._outfp.write(format_bsd_line("TIMESTAMP", ts, None, False)) | 534 self._writer.write_timestamp(ts) |
| 540 ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T") | 535 ts = (datetime.datetime.utcfromtimestamp(ts)).isoformat("T") |
| 541 self._outfp.write(format_bsd_line("ISOTIMESTAMP", ts, None, False)) | 536 self._writer.write_isotimestamp(ts) |
| 542 | 537 |
| 543 if comment: | 538 if comment: |
| 544 for line in comment: | 539 for line in comment: |
| 545 self._outfp.write( | 540 self._writer.write_comment(line) |
| 546 format_bsd_line("COMMENT", None, line, False)) | |
| 547 | 541 |
| 548 if self._minimal is not None: | 542 if self._minimal is not None: |
| 549 self._outfp.write(format_bsd_line( | 543 self._writer.write_root( |
| 550 "ROOT", | |
| 551 None, | |
| 552 (walk.WalkDirEntry.alt_u8(self._minimal) | 544 (walk.WalkDirEntry.alt_u8(self._minimal) |
| 553 if self._minimal else b""), | 545 if self._minimal else b"")) |
| 554 False)) | |
| 555 else: | 546 else: |
| 556 self._outfp.write(format_bsd_line( | 547 self._writer.write_root(walk.WalkDirEntry.alt_u8(root)) |
| 557 "ROOT", None, walk.WalkDirEntry.alt_u8(root), False)) | 548 self._writer.flush() |
| 558 self._outfp.flush() | |
| 559 | 549 |
| 560 if not self._follow_symlinks.command_line and os.path.islink(root): | 550 if not self._follow_symlinks.command_line and os.path.islink(root): |
| 561 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) | 551 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) |
| 562 linkdgst = self._algorithm[0]() | 552 linkdgst = self._algorithm[0]() |
| 563 linkdgst.update( | 553 linkdgst.update( |
| 567 dir_dgst.update(b"1:L,") | 557 dir_dgst.update(b"1:L,") |
| 568 dir_dgst.update( | 558 dir_dgst.update( |
| 569 util.interpolate_bytes( | 559 util.interpolate_bytes( |
| 570 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) | 560 b"%d:%s,", len(linkdgst.digest()), linkdgst.digest())) |
| 571 if self._size_only: | 561 if self._size_only: |
| 572 self._outfp.write( | 562 self._writer.write_size(b"./@/", 0) |
| 573 format_bsd_line( | |
| 574 "SIZE", | |
| 575 None, | |
| 576 "./@/", | |
| 577 False, | |
| 578 0)) | |
| 579 else: | 563 else: |
| 580 self._outfp.write( | 564 self._writer.write_file_digest( |
| 581 format_bsd_line( | 565 self._algorithm[1], |
| 582 self._algorithm[1], | 566 b"./@/", |
| 583 dir_dgst.digest(), | 567 dir_dgst.digest(), |
| 584 "./@/", | 568 self._use_base64) |
| 585 self._use_base64)) | 569 self._writer.flush() |
| 586 self._outfp.flush() | 570 self._writer.finish() |
| 587 self._outfp.write(format_bsd_line( | |
| 588 "CRC32", self._outfp.hexcrcdigest(), None, False)) | |
| 589 return | 571 return |
| 590 | 572 |
| 591 self._generate(os.path.normpath(root), tuple()) | 573 self._generate(os.path.normpath(root), tuple()) |
| 592 self._outfp.write(format_bsd_line( | 574 self._writer.finish() |
| 593 "CRC32", self._outfp.hexcrcdigest(), None, False)) | |
| 594 | 575 |
| 595 def _generate(self, root, top): | 576 def _generate(self, root, top): |
| 596 logging.debug("Handling %s/%r", root, top) | 577 logging.debug("Handling %s/%r", root, top) |
| 597 path = os.path.join(root, *top) if top else root | 578 path = os.path.join(root, *top) if top else root |
| 598 with walk.ScanDir(path) as dirscan: | 579 with walk.ScanDir(path) as dirscan: |
| 664 if self._utf8_mode: | 645 if self._utf8_mode: |
| 665 opath = walk.WalkDirEntry.alt_u8(opath) | 646 opath = walk.WalkDirEntry.alt_u8(opath) |
| 666 else: | 647 else: |
| 667 opath = walk.WalkDirEntry.alt_fs(opath) | 648 opath = walk.WalkDirEntry.alt_fs(opath) |
| 668 if self._size_only: | 649 if self._size_only: |
| 669 self._outfp.write(format_bsd_line( | 650 self._writer.write_size( |
| 670 "SIZE", None, | |
| 671 util.interpolate_bytes(b"%s/./@/", opath), | 651 util.interpolate_bytes(b"%s/./@/", opath), |
| 672 False, 0)) | 652 0) |
| 673 else: | 653 else: |
| 674 self._outfp.write(format_bsd_line( | 654 self._writer.write_file_digest( |
| 675 self._algorithm[1], | 655 self._algorithm[1], |
| 656 util.interpolate_bytes(b"%s/./@/", opath), | |
| 676 linkdgst.digest(), | 657 linkdgst.digest(), |
| 677 util.interpolate_bytes(b"%s/./@/", opath), | 658 self._use_base64) |
| 678 self._use_base64)) | 659 self._writer.flush() |
| 679 self._outfp.flush() | |
| 680 else: | 660 else: |
| 681 # | 661 # |
| 682 # Follow the symlink to dir or handle a "real" directory | 662 # Follow the symlink to dir or handle a "real" directory |
| 683 # | 663 # |
| 684 | 664 |
| 777 if self._utf8_mode: | 757 if self._utf8_mode: |
| 778 opath = walk.WalkDirEntry.alt_u8(opath) | 758 opath = walk.WalkDirEntry.alt_u8(opath) |
| 779 else: | 759 else: |
| 780 opath = walk.WalkDirEntry.alt_fs(opath) | 760 opath = walk.WalkDirEntry.alt_fs(opath) |
| 781 if self._size_only: | 761 if self._size_only: |
| 782 self._outfp.write(format_bsd_line( | 762 self._writer.write_size( |
| 783 "SIZE", None, | |
| 784 util.interpolate_bytes(b"%s/./@", opath), | 763 util.interpolate_bytes(b"%s/./@", opath), |
| 785 False, 0)) | 764 0) |
| 786 else: | 765 else: |
| 787 self._outfp.write(format_bsd_line( | 766 self._writer.write_file_digest( |
| 788 self._algorithm[1], | 767 self._algorithm[1], |
| 768 util.interpolate_bytes(b"%s/./@", opath), | |
| 789 linkdgst.digest(), | 769 linkdgst.digest(), |
| 790 util.interpolate_bytes(b"%s/./@", opath), | 770 self._use_base64) |
| 791 self._use_base64)) | 771 self._writer.flush() |
| 792 self._outfp.flush() | |
| 793 else: | 772 else: |
| 794 # | 773 # |
| 795 # Follow the symlink to file or handle a "real" file | 774 # Follow the symlink to file or handle a "real" file |
| 796 # | 775 # |
| 797 | 776 |
| 842 if self._utf8_mode: | 821 if self._utf8_mode: |
| 843 opath = walk.WalkDirEntry.alt_u8(opath) | 822 opath = walk.WalkDirEntry.alt_u8(opath) |
| 844 else: | 823 else: |
| 845 opath = walk.WalkDirEntry.alt_fs(opath) | 824 opath = walk.WalkDirEntry.alt_fs(opath) |
| 846 if self._size_only: | 825 if self._size_only: |
| 847 self._outfp.write(format_bsd_line( | 826 self._writer.write_size(opath, fso.stat.st_size) |
| 848 "SIZE", None, opath, False, fso.stat.st_size)) | |
| 849 else: | 827 else: |
| 850 if self._print_size: | 828 sz = fso.stat.st_size if self._print_size else None |
| 851 self._outfp.write(format_bsd_line( | 829 self._writer.write_file_digest( |
| 852 self._algorithm[1], | 830 self._algorithm[1], opath, dgst, |
| 853 dgst, opath, | 831 use_base64=self._use_base64, |
| 854 self._use_base64, | 832 size=sz) |
| 855 fso.stat.st_size)) | 833 self._writer.flush() |
| 856 else: | |
| 857 self._outfp.write(format_bsd_line( | |
| 858 self._algorithm[1], dgst, opath, | |
| 859 self._use_base64)) | |
| 860 self._outfp.flush() | |
| 861 opath = join_output_path(top, None) | 834 opath = join_output_path(top, None) |
| 862 if opath: | 835 if opath: |
| 863 if self._utf8_mode: | 836 if self._utf8_mode: |
| 864 opath = walk.WalkDirEntry.alt_u8(opath) | 837 opath = walk.WalkDirEntry.alt_u8(opath) |
| 865 else: | 838 else: |
| 866 opath = walk.WalkDirEntry.alt_fs(opath) | 839 opath = walk.WalkDirEntry.alt_fs(opath) |
| 867 if self._size_only: | 840 if self._size_only: |
| 868 self._outfp.write(format_bsd_line( | 841 self._writer.write_size(opath, dir_size) |
| 869 "SIZE", None, opath, False, dir_size)) | |
| 870 else: | 842 else: |
| 871 if dir_tainted: | 843 if dir_tainted: |
| 872 # | 844 # |
| 873 # IMPORTANT: Print errors BEFORE the associated digest line. | 845 # IMPORTANT: Print errors BEFORE the associated digest line. |
| 874 # Otherwise the "info" command has a problem. | 846 # Otherwise the "info" command has a problem. |
| 875 # | 847 # |
| 876 self._outfp.write(format_bsd_line( | 848 self._writer.write_error(b"directory is tainted") |
| 877 b"ERROR", None, b"directory is tainted", False, None)) | |
| 878 logging.error("Directory has filename problems: %r", opath) | 849 logging.error("Directory has filename problems: %r", opath) |
| 879 if self._print_size: | 850 sz = dir_size if self._print_size else None |
| 880 self._outfp.write(format_bsd_line( | 851 self._writer.write_file_digest( |
| 881 self._algorithm[1], dir_dgst.digest(), opath, | 852 self._algorithm[1], opath, dir_dgst.digest(), |
| 882 self._use_base64, dir_size)) | 853 use_base64=self._use_base64, size=sz) |
| 883 else: | 854 self._writer.flush() |
| 884 self._outfp.write(format_bsd_line( | |
| 885 self._algorithm[1], dir_dgst.digest(), opath, | |
| 886 self._use_base64)) | |
| 887 self._outfp.flush() | |
| 888 return (dir_dgst.digest(), dir_size) | 855 return (dir_dgst.digest(), dir_size) |
| 889 | 856 |
| 890 | 857 |
| 891 def join_output_path(top, name): | 858 def join_output_path(top, name): |
| 892 if name is None: | 859 if name is None: |
| 969 if not modestr.startswith("0"): | 936 if not modestr.startswith("0"): |
| 970 modestr = "0" + modestr | 937 modestr = "0" + modestr |
| 971 return modestr | 938 return modestr |
| 972 | 939 |
| 973 | 940 |
| 974 def format_bsd_line(what, value, filename, use_base64, size=None): | 941 class TreesumWriter(object): |
| 975 ls = util.b(os.linesep) | 942 |
| 976 if not isinstance(what, bytes): | 943 """Writer to write treesum digest files in a format similar to BSD |
| 977 what = what.encode("ascii") | 944 digest files. |
| 978 if what == b"TIMESTAMP": | 945 |
| 979 assert filename is None | 946 Wraps an output file pointer for a binary file. |
| 980 return util.interpolate_bytes(b"TIMESTAMP = %d%s", value, ls) | 947 |
| 981 if what in (b"FSENCODING", b"ISOTIMESTAMP", b"FLAGS", b"VERSION", | 948 Provides high-level methods to write data lines. |
| 982 b"CRC32"): | 949 |
| 983 assert filename is None | 950 Also holds the current CRC for a block. |
| 984 return util.interpolate_bytes(b"%s = %s%s", what, util.b(value), ls) | 951 |
| 985 assert filename is not None | 952 """ |
| 986 if what in (b"COMMENT", b"ERROR", b"GENERATOR"): | 953 |
| 987 return util.interpolate_bytes( | 954 LS = util.b(os.linesep) |
| 988 b"%s (%s)%s", what, util.b(filename, "utf-8"), ls) | 955 |
| 989 if not isinstance(filename, bytes): | 956 def __init__(self, outfp): |
| 990 filename = util.fsencode(filename) | 957 self._outfp = outfp |
| 991 if what == b"SIZE": | 958 self._reset_crc() |
| 992 return util.interpolate_bytes(b"SIZE (%s) = %d%s", filename, size, ls) | 959 |
| 993 if value is None: | 960 def _reset_crc(self): |
| 994 return util.interpolate_bytes(b"%s (%s)%s", what, filename, ls) | 961 self._crc = crc32() |
| 995 if use_base64: | 962 |
| 996 value = base64.b64encode(value) | 963 def start(self, version): |
| 997 else: | 964 """Begin a new block, reset the current CRC and write the VERSION |
| 998 value = binascii.hexlify(value) | 965 tag. |
| 999 if filename != b"./@/": | 966 |
| 1000 filename = util.normalize_filename(filename, True) | 967 """ |
| 1001 if size is None: | 968 self._reset_crc() |
| 1002 return util.interpolate_bytes( | 969 self.write(b"VERSION = ") |
| 1003 b"%s (%s) = %s%s", what, filename, value, ls) | 970 self.writeln(util.b(version)) |
| 1004 else: | 971 |
| 1005 return util.interpolate_bytes( | 972 def write_comment(self, comment): |
| 1006 b"%s (%s) = %s,%d%s", what, filename, value, size, ls) | 973 self.write(b"COMMENT (") |
| 974 self.write(util.b(comment, "utf-8")) | |
| 975 self.writeln(b")") | |
| 976 | |
| 977 def write_generator(self, generator): | |
| 978 self.write(b"GENERATOR (") | |
| 979 self.write(util.b(generator, "utf-8")) | |
| 980 self.writeln(b")") | |
| 981 | |
| 982 def write_error(self, error): | |
| 983 self.write(b"ERROR (") | |
| 984 self.write(util.b(error, "utf-8")) | |
| 985 self.writeln(b")") | |
| 986 | |
| 987 def write_fsencoding(self, encoding): | |
| 988 self.write(b"FSENCODING = ") | |
| 989 self.writeln(util.b(encoding)) | |
| 990 | |
| 991 def write_flags(self, flags): | |
| 992 self.write(b"FLAGS = ") | |
| 993 if isinstance(flags, (str, bytes, bytearray)): | |
| 994 self.writeln(util.b(flags)) | |
| 995 else: | |
| 996 flags.sort() | |
| 997 self.writeln(util.b(",".join(flags))) | |
| 998 | |
| 999 def write_timestamp(self, ts): | |
| 1000 self.write(b"TIMESTAMP = ") | |
| 1001 self.writeln(util.b(str(ts))) | |
| 1002 | |
| 1003 def write_isotimestamp(self, ts): | |
| 1004 self.write(b"ISOTIMESTAMP = ") | |
| 1005 self.writeln(util.b(ts)) | |
| 1006 | |
| 1007 def write_root(self, root): | |
| 1008 assert isinstance(root, bytes) | |
| 1009 self.write(b"ROOT (") | |
| 1010 self.write(root) | |
| 1011 self.writeln(b")") | |
| 1012 | |
| 1013 def write_size(self, filename, sz): | |
| 1014 assert isinstance(filename, bytes) | |
| 1015 self.write(b"SIZE (") | |
| 1016 self.write(filename) | |
| 1017 self.write(b") = ") | |
| 1018 self.writeln(util.b(str(sz))) | |
| 1019 | |
| 1020 def write_file_digest(self, algorithm, filename, digest, | |
| 1021 use_base64=False, size=None): | |
| 1022 digest = (base64.b64encode(digest) | |
| 1023 if use_base64 | |
| 1024 else binascii.hexlify(digest)) | |
| 1025 if filename != b"./@/": | |
| 1026 filename = util.normalize_filename(filename, True) | |
| 1027 self.write(util.b(algorithm)) | |
| 1028 self.write(b" (") | |
| 1029 self.write(filename) | |
| 1030 self.write(b") = ") | |
| 1031 self.write(digest) | |
| 1032 if size is not None: | |
| 1033 self.write(b",") | |
| 1034 self.writeln(util.b(str(size))) | |
| 1035 else: | |
| 1036 self.writeln(b"") | |
| 1037 | |
| 1038 def finish(self): | |
| 1039 """Finish a block and write the current CRC""" | |
| 1040 crc = self._crc.hexdigest() | |
| 1041 self.write(b"CRC32 = ") | |
| 1042 self.writeln(util.b(crc)) | |
| 1043 | |
| 1044 def writeln(self, line): | |
| 1045 """Write the bytes `line` into the output file and update the CRC | |
| 1046 accordingly. | |
| 1047 | |
| 1048 :param bytes line: The line to write to (without line ending) | |
| 1049 | |
| 1050 """ | |
| 1051 self.write(line) | |
| 1052 self.write(self.LS) | |
| 1053 | |
| 1054 def write(self, data): | |
| 1055 """Write `data` into the output file and update the CRC accordingly. | |
| 1056 | |
| 1057 :param bytes data: The data to write to and to update the CRC with | |
| 1058 | |
| 1059 """ | |
| 1060 if data: | |
| 1061 self._outfp.write(data) | |
| 1062 self._crc.update(data) | |
| 1063 | |
| 1064 def flush(self): | |
| 1065 self._outfp.flush() | |
| 1007 | 1066 |
| 1008 | 1067 |
| 1009 class TreesumReader(object): | 1068 class TreesumReader(object): |
| 1010 | 1069 |
| 1011 """Reader to read and/or verify treesum digest files. | 1070 """Reader to read and/or verify treesum digest files. |
