changeset 386:f045d46e9f3d

treesum: also collect the CRC checksum when reading .treesum files and test for them
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 17 May 2025 22:41:22 +0200
parents ea73723be05e
children a90c0b9a06d7
files cutils/treesum.py tests/test_treesum.py
diffstat 2 files changed, 59 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/treesum.py	Sat May 17 16:53:16 2025 +0200
+++ b/cutils/treesum.py	Sat May 17 22:41:22 2025 +0200
@@ -1935,7 +1935,7 @@
 
         with reader:
             root = generator = flags = fsencoding = algorithm = digest \
-                = size = None
+                = size = crc_checksum = None
             errors = set()
             comments = []
             fnmatch_filters = []
@@ -1949,7 +1949,8 @@
                     # start a new block
                     in_block = True
                     block_no += 1
-                    root = flags = algorithm = digest = size = None
+                    root = flags = algorithm = digest = size = \
+                        crc_checksum = None
                     comments = []
                 elif record[0] == "GENERATOR":
                     generator = record[1]
@@ -1970,7 +1971,7 @@
                 elif record[0] == "ACCEPT-TREESUM":
                     pass
                 elif record[0] == "CRC32":
-                    pass
+                    crc_checksum = record[1]
                     # in_block = False
                 else:
                     if not in_block:
@@ -1990,7 +1991,8 @@
                                 block_no,
                                 root, generator, fsencoding, flags,
                                 fnmatch_filters,
-                                comments, errors, algorithm, digest, size)
+                                comments, errors, algorithm, digest, size,
+                                crc_checksum)
                             root = generator = flags = fsencoding = algorithm \
                                 = digest = size = None
                             errors = set()
@@ -2002,14 +2004,16 @@
                     block_handler(
                         block_no,
                         root, generator, fsencoding, flags, fnmatch_filters,
-                        comments, errors, algorithm, digest, size)
+                        comments, errors, algorithm, digest, size,
+                        crc_checksum)
             else:
                 logging.warning("missing block end")
 
 
 def print_block_data(block_no, tag, generator, fsencoding, flags,
                      fnmatch_filters, comments, errors,
-                     algorithm, digest, size):
+                     algorithm, digest, size,
+                     crc_checksum):
     digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>"
     sizestr = str(size) if size is not None else "<no size>"
     print("BLOCK No %d:" % (block_no,))
@@ -2042,14 +2046,16 @@
 class TreesumInfo(object):
 
     def __init__(self):
-        self._algorithm = self._digest = self._size = None
+        self._algorithm = self._digest = self._size = self._crc_checksum = None
 
     def __call__(self, block_no, tag, generator, fsencoding, flags,
                  fnmatch_filters, comments, errors,
-                 algorithm, digest, size):
+                 algorithm, digest, size,
+                 crc_checksum):
         self._algorithm = algorithm
         self._digest = digest
         self._size = size
+        self._crc_checksum = crc_checksum    # this is the hex-encoded value
 
     @property
     def algorithm(self):
@@ -2063,6 +2069,12 @@
     def size(self):
         return self._size
 
+    @property
+    def crc_checksum(self):
+        if self._crc_checksum:
+            return self._crc_checksum.upper()
+        return self._crc_checksum
+
     @classmethod
     def collect_last_from_file(cls, digest_file):
         info = cls()
--- a/tests/test_treesum.py	Sat May 17 16:53:16 2025 +0200
+++ b/tests/test_treesum.py	Sat May 17 22:41:22 2025 +0200
@@ -196,6 +196,45 @@
         # accepts uses the digest algorithm from the .treesum file
         self.assertEqual("SHA256", info.algorithm)
 
+    def test_comments_in_treesum_file(self):
+        src_digest_file = os.path.join(DATADIR, "_data.treesum")
+        dst_digest_file = os.path.join(TMPDIR, "_data.treesum")
+
+        with open(src_digest_file, "rb") as src:
+            with open(dst_digest_file, "wb") as dst:
+                first = True
+                lineno = 0
+                while True:
+                    line = src.readline(4096)
+                    if not line:
+                        # write a trailing comment
+                        dst.write("; this is a trailing comment\r\n")
+                        break
+                    if first:
+                        # write a leading comment
+                        dst.write("# this is a leading comment\r\n")
+                        first = False
+                    lineno += 1
+                    dst.write(line)
+                    if lineno == 1:
+                        dst.write(" ;this is a comment after VERSION\n")
+                    elif lineno == 9:
+                        dst.write("#this is a comment after a digest line\r\n")
+        info_opts = cutils.treesum.gen_info_opts(
+            digest_files=[dst_digest_file],
+            last=True)
+        cutils.treesum.print_treesum_digestfile_infos(info_opts)
+        info = cutils.treesum.TreesumInfo.collect_last_from_file(
+            dst_digest_file)
+        self.assertEqual(
+            b"\x69\x6f\xe2\x51\xbe\x94\xbe\xcc\x76\xa5\x91\x24\x1d\x46\x83\xbb\x44\x36\xc7\x9b\x5b\x7b\x62\xb3\xe0\x4a\x0e\xdc\x7e\xc4\x07\xcb",    # noqa: E501 line too long
+            info.digest)
+        # accepts the size within the .treesum file
+        self.assertEqual(67, info.size)
+        # accepts uses the digest algorithm from the .treesum file
+        self.assertEqual("SHA256", info.algorithm)
+        self.assertEqual("4C53C26D", info.crc_checksum)
+
 
 if __name__ == "__main__":
     sys.exit(unittest.main(buffer=True))