Mercurial > hgrepos > Python > apps > py-cutils
view cutils/util/__init__.py @ 177:089c40240061
Add an alternate implementation for generating directory tree digests:
- Do not use something like os.walk() but use os.scandir() directly.
- Recursively generate the subdirectory digests only when needed and in
the right order.
This fixes that the order of subdirectories in the output did not
match the application order of its directory digests.
The new implementation also should make filtering (that will be
implemented later) easier.
NOTE: The tree digests of the old and the new implementation are identical.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 11 Jan 2025 17:41:28 +0100 |
| parents | e081b6ee5570 |
| children | 6154b8e4ba94 |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # :Copyright: (c) 2020-2025 Franz Glasner # :License: BSD-3-Clause # :- r"""Utility package. """ __all__ = ["PY2", "PY35", "normalize_filename", "argv2algo", "algotag2algotype", "get_blake2b", "get_blake2b_256", "get_blake2s", "default_algotag", "fsencode", ] import argparse import hashlib import os import sys PY2 = sys.version_info[0] < 3 PY35 = sys.version_info[:2] >= (3, 5) def default_algotag(): """Determine the "best" default algorithm. Depend on availability in :mod:`hashlib`. Prefer BLAKE2b-256, SHA256 or SHA1 -- in this order. Does not consider :mod:`pyblake2` if it is available eventually. """ if "blake2b" in hashlib.algorithms_available: return "BLAKE2b-256" if "sha256" in hashlib.algorithms_available: return "SHA256" return "SHA1" def get_blake2b(): """Get the factory for blake2b""" try: return hashlib.blake2b except AttributeError: import pyblake2 return pyblake2.blake2b def get_blake2b_256(): """Get the factory for blake2b-256""" try: hashlib.blake2b except AttributeError: import pyblake2 def _get_blake(): return pyblake2.blake2b(digest_size=32) else: def _get_blake(): return hashlib.blake2b(digest_size=32) return _get_blake def get_blake2s(): """Get the factory for blake2s""" try: return hashlib.blake2s except AttributeError: import pyblake2 return pyblake2.blake2s def argv2algo(s): """Convert a command line algorithm specifier into a tuple with the type/factory of the digest and the algorithms tag for output purposes. :param str s: the specifier from the command line; should include all algorithm tags also (for proper round-tripping) :return: the internal digest specification :rtype: a tuple (digest_type_or_factory, name_in_output) :raises argparse.ArgumentTypeError: for unrecognized algorithms or names String comparisons are done case-insensitively. """ s = s.lower() if s in ("1", "sha1"): return (hashlib.sha1, "SHA1") elif s in ("224", "sha224"): return (hashlib.sha224, "SHA224") elif s in ("256", "sha256"): return (hashlib.sha256, "SHA256") elif s in ("384", "sha384"): return (hashlib.sha384, "SHA384") elif s in ("512", "sha512"): return (hashlib.sha512, "SHA512") elif s in ("3-224", "sha3-224"): return (hashlib.sha3_224, "SHA3-224") elif s in ("3-256", "sha3-256"): return (hashlib.sha3_256, "SHA3-256") elif s in ("3-384", "sha3-384"): return (hashlib.sha3_384, "SHA3-384") elif s in ("3", "3-512", "sha3-512"): return (hashlib.sha3_512, "SHA3-512") elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"): return (get_blake2b(), "BLAKE2b") elif s in ("blake2s", "blake2s-256"): return (get_blake2s(), "BLAKE2s") elif s in ("blake2-256", "blake2b-256"): return (get_blake2b_256(), "BLAKE2b-256") elif s == "md5": return (hashlib.md5, "MD5") else: raise argparse.ArgumentTypeError( "`{}' is not a recognized algorithm".format(s)) def algotag2algotype(s): """Convert the algorithm specifier in a BSD-style digest file to the type/factory of the corresponding algorithm. :param str s: the tag (i.e. normalized name) or the algorithm :return: the digest type or factory for `s` :raises ValueError: on unknown and/or unhandled algorithms All string comparisons are case-sensitive. """ if s == "SHA1": return hashlib.sha1 elif s == "SHA224": return hashlib.sha224 elif s == "SHA256": return hashlib.sha256 elif s == "SHA384": return hashlib.sha384 elif s == "SHA512": return hashlib.sha512 elif s == "SHA3-224": return hashlib.sha3_224 elif s == "SHA3-256": return hashlib.sha3_256 elif s == "SHA3-384": return hashlib.sha3_384 elif s == "SHA3-512": return hashlib.sha3_512 elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"): # compat for openssl return get_blake2b() elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"): # compat for openssl return get_blake2s() elif s in ("BLAKE2b-256", "BLAKE2b256"): # also compat for openssl dgst return get_blake2b_256() elif s == "MD5": return hashlib.md5 else: raise ValueError("unknown algorithm: {}".format(s)) def normalize_filename(filename, strip_leading_dot_slash=False): if isinstance(filename, bytes): filename = filename.replace(b"\\", b"/") if strip_leading_dot_slash: while filename.startswith(b"./"): filename = filename[2:] else: filename = filename.replace(u"\\", u"/") if strip_leading_dot_slash: while filename.startswith(u"./"): filename = filename[2:] return filename def fsencode(what): """A somewhat compatibility function for :func:`os.fsencode`. If `what` is of type :class:`bytes` no :func:`os.fsencode` is required. """ if isinstance(what, bytes): return what return os.fsencode(what) def interpolate_bytes(formatstr, *values): """Interpolate byte strings also on Python 3.4. :param bytes formatstr: :param values: params for interpolation: may *not* contain Unicode strings :rvalue: the formatted octet :rtype: bytes """ assert isinstance(formatstr, bytes) # Python 3.5+ or Python2 know how to interpolate byte strings if PY35 or PY2: return formatstr % values # Workaround with a Latin-1 dance tformatstr = formatstr.decode("latin1") tvalues = [] for v in values: if PY2: if isinstance(v, unicode): # noqa: F821 undefined name 'unicode' assert False else: if isinstance(v, str): assert False if isinstance(v, bytes): tvalues.append(v.decode("latin1")) else: tvalues.append(v) return (tformatstr % tuple(tvalues)).encode("latin1")
