diff cutils/util/__init__.py @ 307:64df94bf4659

treesum: Build a little static database of digest sizes. So older Python versions can read and use treesum files produced by newer Python versions and digest algorithms.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 07 Mar 2025 14:22:22 +0100
parents 44e62e36cad4
children 48430941c18c
line wrap: on
line diff
--- a/cutils/util/__init__.py	Wed Mar 05 14:05:38 2025 +0100
+++ b/cutils/util/__init__.py	Fri Mar 07 14:22:22 2025 +0100
@@ -16,6 +16,7 @@
            "normalize_filename",
            "argv2algo",
            "algotag2algotype",
+           "algotag2digest_size",
            "get_blake2b",
            "get_blake2b_256",
            "get_blake2s",
@@ -218,11 +219,17 @@
     :param str s: the tag (i.e. normalized name) or the algorithm
     :return: the digest type or factory for `s`
     :raises ValueError: on unknown and/or unhandled algorithms
+    :raises ImportError: if a module that is required to handle given
+                         specifier `s` is not available (e.g. BLAKE2b on
+                         Python 2)
 
     All string comparisons are case-sensitive.
 
     """
-    if s == "SHA1":
+    # Standard in Python2.7
+    if s == "MD5":
+        return hashlib.md5
+    elif s == "SHA1":
         return hashlib.sha1
     elif s == "SHA224":
         return hashlib.sha224
@@ -232,6 +239,7 @@
         return hashlib.sha384
     elif s == "SHA512":
         return hashlib.sha512
+    # Available in Python 3.6+
     elif s == "SHA3-224":
         return hashlib.sha3_224
     elif s == "SHA3-256":
@@ -240,14 +248,14 @@
         return hashlib.sha3_384
     elif s == "SHA3-512":
         return hashlib.sha3_512
+    # Available in Python 3.6+ or if pyblake2 is installed
     elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"):  # compat for openssl
         return get_blake2b()
     elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"):  # compat for openssl
         return get_blake2s()
     elif s in ("BLAKE2b-256", "BLAKE2b256"):   # also compat for openssl dgst
         return get_blake2b_256()
-    elif s == "MD5":
-        return hashlib.md5
+    # Vendored in cutils.crcmod
     elif s == "CRC-24":
         return get_crc("crc-24")
     elif s == "CRC-32-ISO":
@@ -270,6 +278,43 @@
         raise ValueError("unknown algorithm: {}".format(s))
 
 
+def algotag2digest_size(s):
+    """Get the `digest_size` in bytes from given algorithm specifier `s`.
+
+    Contains a small static database of digest sizes for algorithms that
+    are not available by default in older Python versions.
+
+    :raises ValueError: on unknown and/or unhandled algorithms
+    :raises ImportError: if a module that is required to handle given
+                         specifier `s` is not available (e.g. BLAKE2b on
+                         Python 2)
+
+    All string comparisons are case-sensitive.
+
+    """
+    try:
+        dgst = algotag2algotype(s)()
+        return dgst.digest_size
+    except ImportError:
+        sz = {
+            "SHA3-224": 28,
+            "SHA3-256": 32,
+            "SHA3-384": 48,
+            "SHA3-512": 64,
+            "BLAKE2b": 64,
+            "BLAKE2b-512": 64,
+            "BLAKE2b512": 64,
+            "BLAKE2b-256": 32,
+            "BLAKE2b256": 32,
+            "BLAKE2s": 32,
+            "BLAKE2s-256": 32,
+            "BLAKE2s256": 32,
+        }.get(s, None)
+        if not sz:
+            raise
+        return sz
+
+
 def normalize_filename(filename, strip_dot_slashes=False):
     if isinstance(filename, bytes):
         filename = filename.replace(b"\\", b"/")