view cutils/util/__init__.py @ 173:e081b6ee5570

treesum.py now runs on Python3.4 also: use a workaround for its missing byte % formatting. No extra module is required for it to run using sha SHA and SHA-2 family of digests.
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 10 Jan 2025 12:46:44 +0100
parents 804a823c63f5
children 6154b8e4ba94
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# :Copyright: (c) 2020-2025 Franz Glasner
# :License:   BSD-3-Clause
# :-
r"""Utility package.

"""

__all__ = ["PY2",
           "PY35",
           "normalize_filename",
           "argv2algo",
           "algotag2algotype",
           "get_blake2b",
           "get_blake2b_256",
           "get_blake2s",
           "default_algotag",
           "fsencode",
           ]


import argparse
import hashlib
import os
import sys


PY2 = sys.version_info[0] < 3
PY35 = sys.version_info[:2] >= (3, 5)


def default_algotag():
    """Determine the "best" default algorithm.

    Depend on availability in :mod:`hashlib`.

    Prefer BLAKE2b-256, SHA256 or SHA1 -- in this order.

    Does not consider :mod:`pyblake2` if it is available eventually.

    """
    if "blake2b" in hashlib.algorithms_available:
        return "BLAKE2b-256"
    if "sha256" in hashlib.algorithms_available:
        return "SHA256"
    return "SHA1"


def get_blake2b():
    """Get the factory for blake2b"""
    try:
        return hashlib.blake2b
    except AttributeError:
        import pyblake2
        return pyblake2.blake2b


def get_blake2b_256():
    """Get the factory for blake2b-256"""

    try:
        hashlib.blake2b
    except AttributeError:
        import pyblake2

        def _get_blake():
            return pyblake2.blake2b(digest_size=32)

    else:

        def _get_blake():
            return hashlib.blake2b(digest_size=32)

    return _get_blake


def get_blake2s():
    """Get the factory for blake2s"""
    try:
        return hashlib.blake2s
    except AttributeError:
        import pyblake2
        return pyblake2.blake2s


def argv2algo(s):
    """Convert a command line algorithm specifier into a tuple with the
    type/factory of the digest and the algorithms tag for output purposes.

    :param str s: the specifier from the command line; should include all
                  algorithm tags also (for proper round-tripping)
    :return: the internal digest specification
    :rtype: a tuple (digest_type_or_factory, name_in_output)
    :raises argparse.ArgumentTypeError: for unrecognized algorithms or names

    String comparisons are done case-insensitively.

    """
    s = s.lower()
    if s in ("1", "sha1"):
        return (hashlib.sha1, "SHA1")
    elif s in ("224", "sha224"):
        return (hashlib.sha224, "SHA224")
    elif s in ("256", "sha256"):
        return (hashlib.sha256, "SHA256")
    elif s in ("384", "sha384"):
        return (hashlib.sha384, "SHA384")
    elif s in ("512", "sha512"):
        return (hashlib.sha512, "SHA512")
    elif s in ("3-224", "sha3-224"):
        return (hashlib.sha3_224, "SHA3-224")
    elif s in ("3-256", "sha3-256"):
        return (hashlib.sha3_256, "SHA3-256")
    elif s in ("3-384", "sha3-384"):
        return (hashlib.sha3_384, "SHA3-384")
    elif s in ("3", "3-512", "sha3-512"):
        return (hashlib.sha3_512, "SHA3-512")
    elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"):
        return (get_blake2b(), "BLAKE2b")
    elif s in ("blake2s", "blake2s-256"):
        return (get_blake2s(), "BLAKE2s")
    elif s in ("blake2-256", "blake2b-256"):
        return (get_blake2b_256(), "BLAKE2b-256")
    elif s == "md5":
        return (hashlib.md5, "MD5")
    else:
        raise argparse.ArgumentTypeError(
            "`{}' is not a recognized algorithm".format(s))


def algotag2algotype(s):
    """Convert the algorithm specifier in a BSD-style digest file to the
    type/factory of the corresponding algorithm.

    :param str s: the tag (i.e. normalized name) or the algorithm
    :return: the digest type or factory for `s`
    :raises ValueError: on unknown and/or unhandled algorithms

    All string comparisons are case-sensitive.

    """
    if s == "SHA1":
        return hashlib.sha1
    elif s == "SHA224":
        return hashlib.sha224
    elif s == "SHA256":
        return hashlib.sha256
    elif s == "SHA384":
        return hashlib.sha384
    elif s == "SHA512":
        return hashlib.sha512
    elif s == "SHA3-224":
        return hashlib.sha3_224
    elif s == "SHA3-256":
        return hashlib.sha3_256
    elif s == "SHA3-384":
        return hashlib.sha3_384
    elif s == "SHA3-512":
        return hashlib.sha3_512
    elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"):  # compat for openssl
        return get_blake2b()
    elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"):  # compat for openssl
        return get_blake2s()
    elif s in ("BLAKE2b-256", "BLAKE2b256"):   # also compat for openssl dgst
        return get_blake2b_256()
    elif s == "MD5":
        return hashlib.md5
    else:
        raise ValueError("unknown algorithm: {}".format(s))


def normalize_filename(filename, strip_leading_dot_slash=False):
    if isinstance(filename, bytes):
        filename = filename.replace(b"\\", b"/")
        if strip_leading_dot_slash:
            while filename.startswith(b"./"):
                filename = filename[2:]
    else:
        filename = filename.replace(u"\\", u"/")
        if strip_leading_dot_slash:
            while filename.startswith(u"./"):
                filename = filename[2:]
    return filename


def fsencode(what):
    """A somewhat compatibility function for :func:`os.fsencode`.

    If `what` is of type :class:`bytes` no :func:`os.fsencode` is required.

    """
    if isinstance(what, bytes):
        return what
    return os.fsencode(what)


def interpolate_bytes(formatstr, *values):
    """Interpolate byte strings also on Python 3.4.

    :param bytes formatstr:
    :param values: params for interpolation: may *not* contain Unicode strings
    :rvalue: the formatted octet
    :rtype: bytes

    """
    assert isinstance(formatstr, bytes)
    # Python 3.5+ or Python2 know how to interpolate byte strings
    if PY35 or PY2:
        return formatstr % values
    # Workaround with a Latin-1 dance
    tformatstr = formatstr.decode("latin1")
    tvalues = []
    for v in values:
        if PY2:
            if isinstance(v, unicode):  # noqa: F821  undefined name 'unicode'
                assert False
        else:
            if isinstance(v, str):
                assert False
        if isinstance(v, bytes):
            tvalues.append(v.decode("latin1"))
        else:
            tvalues.append(v)
    return (tformatstr % tuple(tvalues)).encode("latin1")