view tests/test_treesum.py @ 386:f045d46e9f3d

treesum: also collect the CRC checksum when reading .treesum files and test for them
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 17 May 2025 22:41:22 +0200
parents ea73723be05e
children
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# SPDX-FileCopyrightText: © 2025 Franz Glasner
# SPDX-License-Identifier: BSD-3-Clause
# :-
r"""Unit tests for treesum

"""

from __future__ import absolute_import, print_function

import os
import shutil
import sys
import unittest

from _test_setup import (DATADIR, TMPDIR)

import cutils.treesum


class TaggedTreesumTests(unittest.TestCase):

    def setUp(self):
        if not os.path.isdir(TMPDIR):
            os.mkdir(TMPDIR)

    def tearDown(self):
        if os.path.isdir(TMPDIR):
            shutil.rmtree(TMPDIR)

    def test_gen_and_info_P_utf8(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_P_utf8.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,)),
                             ("exclude", "glob:*.treesum")],
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=True)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\xcb\x04\x39\x44\x60\x08\xd7\xfa\x0a\x25\xfd\xc6\xbb\x74\x8e\x2c\x12\x13\xf8\x31",    # noqa: E501 line too long
            info.digest)
        self.assertEqual(55, info.size)
        self.assertEqual("SHA1", info.algorithm)

    def test_gen_and_info_P_native(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_P_native.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,)),
                             ("exclude", "glob:*.treesum")],
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=False)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\xcb\x04\x39\x44\x60\x08\xd7\xfa\x0a\x25\xfd\xc6\xbb\x74\x8e\x2c\x12\x13\xf8\x31",    # noqa: E501 line too long
            info.digest)
        self.assertEqual(55, info.size)
        self.assertEqual("SHA1", info.algorithm)

    def test_gen_and_info_L_utf8(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_L_utf8.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,)),
                             ("exclude", "glob:*.treesum")],
            follow_symlinks=cutils.treesum.FollowSymlinkConfig(
                True, True, True),
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=True)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\xfe\x21\x4c\xfa\xdc\xc5\x1f\xad\x63\x19\x74\x95\xf5\xb3\x6d\x32\x45\xec\x73\x3e",    # noqa: E501 line too long
            info.digest)
        self.assertEqual(110, info.size)
        self.assertEqual("SHA1", info.algorithm)

    def test_gen_and_info_L_native(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_L_native.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,)),
                             ("exclude", "glob:*.treesum")],
            follow_symlinks=cutils.treesum.FollowSymlinkConfig(
                True, True, True),
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=False)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\xfe\x21\x4c\xfa\xdc\xc5\x1f\xad\x63\x19\x74\x95\xf5\xb3\x6d\x32\x45\xec\x73\x3e",    # noqa: E501 line too long
            info.digest)
        self.assertEqual(110, info.size)
        self.assertEqual("SHA1", info.algorithm)

    def test_gen_and_info_P_utf8_with_treesum(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_P_native.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,))],
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=True)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\x78\xdd\xcd\xd2\xbe\xa5\x2c\x8c\xc9\x1e\x3a\xd4\x26\xda\x35\x2e\xa6\xc2\x0a\x06",    # noqa: E501 line too long
            info.digest)
        self.assertEqual(2620, info.size)
        self.assertEqual("SHA1", info.algorithm)

    def test_gen_and_info_P_utf8_accept_treesum(self):
        digest_file = os.path.join(
            TMPDIR,
            "%d__test_gen_and_info_P_native.info" % (os.getpid(),))
        rel_tmpdir = os.path.relpath(TMPDIR, DATADIR)
        gen_opts = cutils.treesum.gen_generate_opts(
            directories=[DATADIR],
            algorithm="SHA1",
            fnmatch_filters=[("exclude", "path:%s" % (rel_tmpdir,)),
                             ("accept-treesum", "glob:*.treesum")],
            generator="full",
            grouping_separator="_",
            output=digest_file,
            output_style="tagged",
            print_size=True,
            utf8=True)
        info_opts = cutils.treesum.gen_info_opts(digest_files=[digest_file],
                                                 last=True)
        cutils.treesum.generate_treesum(gen_opts)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(digest_file)
        self.assertEqual(
            b"\x69\x6f\xe2\x51\xbe\x94\xbe\xcc\x76\xa5\x91\x24\x1d\x46\x83\xbb\x44\x36\xc7\x9b\x5b\x7b\x62\xb3\xe0\x4a\x0e\xdc\x7e\xc4\x07\xcb",    # noqa: E501 line too long
            info.digest)
        # accepts the size within the .treesum file
        self.assertEqual(67, info.size)
        # accepts uses the digest algorithm from the .treesum file
        self.assertEqual("SHA256", info.algorithm)

    def test_comments_in_treesum_file(self):
        src_digest_file = os.path.join(DATADIR, "_data.treesum")
        dst_digest_file = os.path.join(TMPDIR, "_data.treesum")

        with open(src_digest_file, "rb") as src:
            with open(dst_digest_file, "wb") as dst:
                first = True
                lineno = 0
                while True:
                    line = src.readline(4096)
                    if not line:
                        # write a trailing comment
                        dst.write("; this is a trailing comment\r\n")
                        break
                    if first:
                        # write a leading comment
                        dst.write("# this is a leading comment\r\n")
                        first = False
                    lineno += 1
                    dst.write(line)
                    if lineno == 1:
                        dst.write(" ;this is a comment after VERSION\n")
                    elif lineno == 9:
                        dst.write("#this is a comment after a digest line\r\n")
        info_opts = cutils.treesum.gen_info_opts(
            digest_files=[dst_digest_file],
            last=True)
        cutils.treesum.print_treesum_digestfile_infos(info_opts)
        info = cutils.treesum.TreesumInfo.collect_last_from_file(
            dst_digest_file)
        self.assertEqual(
            b"\x69\x6f\xe2\x51\xbe\x94\xbe\xcc\x76\xa5\x91\x24\x1d\x46\x83\xbb\x44\x36\xc7\x9b\x5b\x7b\x62\xb3\xe0\x4a\x0e\xdc\x7e\xc4\x07\xcb",    # noqa: E501 line too long
            info.digest)
        # accepts the size within the .treesum file
        self.assertEqual(67, info.size)
        # accepts uses the digest algorithm from the .treesum file
        self.assertEqual("SHA256", info.algorithm)
        self.assertEqual("4C53C26D", info.crc_checksum)


if __name__ == "__main__":
    sys.exit(unittest.main(buffer=True))