changeset 118:12339ac2148d

Move some functions into cutils.util (i.e. algorithms and their aliases)
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 29 Dec 2024 18:22:22 +0100
parents e51f34ad6d71
children dd4fe912d7e9
files cutils/shasum.py cutils/util/__init__.py
diffstat 2 files changed, 151 insertions(+), 139 deletions(-) [+]
line wrap: on
line diff
--- a/cutils/shasum.py	Sun Dec 29 17:39:00 2024 +0100
+++ b/cutils/shasum.py	Sun Dec 29 18:22:22 2024 +0100
@@ -17,7 +17,6 @@
 import base64
 import binascii
 import errno
-import hashlib
 import io
 try:
     import mmap
@@ -28,8 +27,9 @@
 import stat
 import sys
 
+from . import (__version__, __revision__)
+from . import util
 from .util import constants
-from . import (__version__, __revision__)
 
 
 def main(argv=None):
@@ -37,7 +37,7 @@
         description="Python implementation of shasum",
         fromfile_prefix_chars='@')
     aparser.add_argument(
-        "--algorithm", "-a", action="store", type=argv2algo,
+        "--algorithm", "-a", action="store", type=util.argv2algo,
         help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, blake2, blake2-256, md5")
     aparser.add_argument(
         "--base64", action="store_true",
@@ -109,7 +109,7 @@
         sys.exit(64)   # :manpage:`sysexits(3)`  EX_USAGE
 
     if not opts.algorithm:
-        opts.algorithm = argv2algo("1")
+        opts.algorithm = util.argv2algo("1")
 
     opts.dest = None
 
@@ -125,7 +125,7 @@
     if checklist and check:
         raise ValueError("only one of `checklist' or `check' is allowed")
     opts = argparse.Namespace(files=files,
-                              algorithm=(algotag2algotype(algorithm),
+                              algorithm=(util.algotag2algotype(algorithm),
                                          algorithm),
                               bsd=bsd,
                               checklist=checklist,
@@ -346,7 +346,7 @@
         filenames = ("-", "stdin", "", )
     else:
         filenames = (
-            normalize_filename(filename, strip_leading_dot_slash=True),)
+            util.normalize_filename(filename, strip_leading_dot_slash=True),)
     with io.open(checklist, "rt", encoding="utf-8") as clf:
         for checkline in clf:
             if not checkline:
@@ -358,7 +358,7 @@
             if parts[0] in ("SIZE", "TIMESTAMP"):
                 assert opts.allow_distinfo
                 continue
-            fn = normalize_filename(parts[2], strip_leading_dot_slash=True)
+            fn = util.normalize_filename(parts[2], strip_leading_dot_slash=True)
             if fn in filenames:
                 return parts
         else:
@@ -389,7 +389,7 @@
             if mo.group(1) == "SIZE":
                 return ("SIZE", None, None, mo.group(3))
         return (mo.group(1),
-                algotag2algotype(mo.group(1)),
+                util.algotag2algotype(mo.group(1)),
                 mo.group(2),
                 mo.group(3))
     else:
@@ -410,126 +410,6 @@
             return None
 
 
-def get_blake2b():
-    """Get the factory for blake2b"""
-    try:
-        return hashlib.blake2b
-    except AttributeError:
-        import pyblake2
-        return pyblake2.blake2b
-
-
-def get_blake2s():
-    """Get the factory for blake2s"""
-    try:
-        return hashlib.blake2s
-    except AttributeError:
-        import pyblake2
-        return pyblake2.blake2s
-
-
-def get_blake2_256():
-    """Get the factory for blake2-256"""
-
-    try:
-        hashlib.blake2b
-    except AttributeError:
-        import pyblake2
-
-        def _get_blake():
-            return pyblake2.blake2b(digest_size=32)
-
-    else:
-
-        def _get_blake():
-            return hashlib.blake2b(digest_size=32)
-
-    return _get_blake
-
-
-def argv2algo(s):
-    """Convert a command line algorithm specifier into a tuple with the
-    type/factory of the digest and the algorithms tag for output purposes.
-
-    :param str s: the specifier from the commane line
-    :return: the internal digest specification
-    :rtype: a tuple (digest_type_or_factory, name_in_output)
-
-    String comparisons are done case-insensitively.
-
-    """
-    s = s.lower()
-    if s in ("1", "sha1"):
-        return (hashlib.sha1, "SHA1")
-    elif s in ("224", "sha224"):
-        return (hashlib.sha224, "SHA224")
-    elif s in ("256", "sha256"):
-        return (hashlib.sha256, "SHA256")
-    elif s in ("384", "sha384"):
-        return (hashlib.sha384, "SHA384")
-    elif s in ("512", "sha512"):
-        return (hashlib.sha512, "SHA512")
-    elif s in ("3-224", "sha3-224"):
-        return (hashlib.sha3_224, "SHA3-224")
-    elif s in ("3-256", "sha3-256"):
-        return (hashlib.sha3_256, "SHA3-256")
-    elif s in ("3-384", "sha3-384"):
-        return (hashlib.sha3_384, "SHA3-384")
-    elif s in ("3-512", "sha3-512"):
-        return (hashlib.sha3_512, "SHA3-512")
-    elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"):
-        return (get_blake2b(), "BLAKE2b")
-    elif s in ("blake2s", "blake2s-256"):
-        return (get_blake2s(), "BLAKE2s")
-    elif s in ("blake2-256", "blake2b-256"):
-        return (get_blake2_256(), "BLAKE2b-256")
-    elif s == "md5":
-        return (hashlib.md5, "MD5")
-    else:
-        raise argparse.ArgumentTypeError(
-            "`{}' is not a recognized algorithm".format(s))
-
-
-def algotag2algotype(s):
-    """Convert the algorithm specifier in a BSD-style digest file to the
-    type/factory of the corresponding algorithm.
-
-    :param str s: the tag (i.e. normalized name) or the algorithm
-    :return: the digest type or factory for `s`
-
-    All string comparisons are case-sensitive.
-
-    """
-    if s == "SHA1":
-        return hashlib.sha1
-    elif s == "SHA224":
-        return hashlib.sha224
-    elif s == "SHA256":
-        return hashlib.sha256
-    elif s == "SHA384":
-        return hashlib.sha384
-    elif s == "SHA512":
-        return hashlib.sha512
-    elif s == "SHA3-224":
-        return hashlib.sha3_224
-    elif s == "SHA3-256":
-        return hashlib.sha3_256
-    elif s == "SHA3-384":
-        return hashlib.sha3_384
-    elif s == "SHA3-512":
-        return hashlib.sha3_512
-    elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"):  # compat for openssl
-        return get_blake2b()
-    elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"):  # compat for openssl
-        return get_blake2s()
-    elif s in ("BLAKE2b-256", "BLAKE2b256"):   # also compat for openssl dgst
-        return get_blake2_256()
-    elif s == "MD5":
-        return hashlib.md5
-    else:
-        raise ValueError("unknown algorithm: {}".format(s))
-
-
 def out_bsd(dest, digest, filename, digestname, binary, use_base64):
     """BSD format output, also :command:`openssl dgst` and
     :command:`b2sum --tag" format output
@@ -543,7 +423,7 @@
         print(digest, file=dest)
     else:
         print("{} ({}) = {}".format(digestname,
-                                    normalize_filename(filename),
+                                    util.normalize_filename(filename),
                                     digest),
               file=dest)
 
@@ -559,7 +439,7 @@
     print("{} {}{}".format(
                 digest,
                 '*' if binary else ' ',
-                '-' if filename is None else normalize_filename(filename)),
+                '-' if filename is None else util.normalize_filename(filename)),
           file=dest)
 
 
@@ -711,13 +591,5 @@
     return h.digest()
 
 
-def normalize_filename(filename, strip_leading_dot_slash=False):
-    filename = filename.replace("\\", "/")
-    if strip_leading_dot_slash:
-        while filename.startswith("./"):
-            filename = filename[2:]
-    return filename
-
-
 if __name__ == "__main__":
     sys.exit(main())
--- a/cutils/util/__init__.py	Sun Dec 29 17:39:00 2024 +0100
+++ b/cutils/util/__init__.py	Sun Dec 29 18:22:22 2024 +0100
@@ -7,4 +7,144 @@
 
 """
 
-__all__ = []
+__all__ = ["normalize_filename",
+           "argv2algo",
+           "algotag2algotype",
+           "get_blake2b",
+           "get_blake2b_256",
+           "get_blake2s",
+           ]
+
+
+import argparse
+import hashlib
+
+
+def get_blake2b():
+    """Get the factory for blake2b"""
+    try:
+        return hashlib.blake2b
+    except AttributeError:
+        import pyblake2
+        return pyblake2.blake2b
+
+
+def get_blake2b_256():
+    """Get the factory for blake2b-256"""
+
+    try:
+        hashlib.blake2b
+    except AttributeError:
+        import pyblake2
+
+        def _get_blake():
+            return pyblake2.blake2b(digest_size=32)
+
+    else:
+
+        def _get_blake():
+            return hashlib.blake2b(digest_size=32)
+
+    return _get_blake
+
+
+def get_blake2s():
+    """Get the factory for blake2s"""
+    try:
+        return hashlib.blake2s
+    except AttributeError:
+        import pyblake2
+        return pyblake2.blake2s
+
+
+def argv2algo(s):
+    """Convert a command line algorithm specifier into a tuple with the
+    type/factory of the digest and the algorithms tag for output purposes.
+
+    :param str s: the specifier from the commane line
+    :return: the internal digest specification
+    :rtype: a tuple (digest_type_or_factory, name_in_output)
+    :raises argparse.ArgumentTypeError: for unrecognized algorithms or names
+
+    String comparisons are done case-insensitively.
+
+    """
+    s = s.lower()
+    if s in ("1", "sha1"):
+        return (hashlib.sha1, "SHA1")
+    elif s in ("224", "sha224"):
+        return (hashlib.sha224, "SHA224")
+    elif s in ("256", "sha256"):
+        return (hashlib.sha256, "SHA256")
+    elif s in ("384", "sha384"):
+        return (hashlib.sha384, "SHA384")
+    elif s in ("512", "sha512"):
+        return (hashlib.sha512, "SHA512")
+    elif s in ("3-224", "sha3-224"):
+        return (hashlib.sha3_224, "SHA3-224")
+    elif s in ("3-256", "sha3-256"):
+        return (hashlib.sha3_256, "SHA3-256")
+    elif s in ("3-384", "sha3-384"):
+        return (hashlib.sha3_384, "SHA3-384")
+    elif s in ("3-512", "sha3-512"):
+        return (hashlib.sha3_512, "SHA3-512")
+    elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"):
+        return (get_blake2b(), "BLAKE2b")
+    elif s in ("blake2s", "blake2s-256"):
+        return (get_blake2s(), "BLAKE2s")
+    elif s in ("blake2-256", "blake2b-256"):
+        return (get_blake2b_256(), "BLAKE2b-256")
+    elif s == "md5":
+        return (hashlib.md5, "MD5")
+    else:
+        raise argparse.ArgumentTypeError(
+            "`{}' is not a recognized algorithm".format(s))
+
+
+def algotag2algotype(s):
+    """Convert the algorithm specifier in a BSD-style digest file to the
+    type/factory of the corresponding algorithm.
+
+    :param str s: the tag (i.e. normalized name) or the algorithm
+    :return: the digest type or factory for `s`
+    :raises ValueError: on unknown and/or unhandled algorithms
+
+    All string comparisons are case-sensitive.
+
+    """
+    if s == "SHA1":
+        return hashlib.sha1
+    elif s == "SHA224":
+        return hashlib.sha224
+    elif s == "SHA256":
+        return hashlib.sha256
+    elif s == "SHA384":
+        return hashlib.sha384
+    elif s == "SHA512":
+        return hashlib.sha512
+    elif s == "SHA3-224":
+        return hashlib.sha3_224
+    elif s == "SHA3-256":
+        return hashlib.sha3_256
+    elif s == "SHA3-384":
+        return hashlib.sha3_384
+    elif s == "SHA3-512":
+        return hashlib.sha3_512
+    elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"):  # compat for openssl
+        return get_blake2b()
+    elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"):  # compat for openssl
+        return get_blake2s()
+    elif s in ("BLAKE2b-256", "BLAKE2b256"):   # also compat for openssl dgst
+        return get_blake2b_256()
+    elif s == "MD5":
+        return hashlib.md5
+    else:
+        raise ValueError("unknown algorithm: {}".format(s))
+
+
+def normalize_filename(filename, strip_leading_dot_slash=False):
+    filename = filename.replace("\\", "/")
+    if strip_leading_dot_slash:
+        while filename.startswith("./"):
+            filename = filename[2:]
+    return filename