Mercurial > hgrepos > Python > apps > py-cutils
view cutils/util/__init__.py @ 188:2784fdcc99e5
Implement basic parsing of treesum output.
Including CRC32 checks.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Wed, 15 Jan 2025 14:41:36 +0100 |
| parents | f04d4b1c14b3 |
| children | 0f4febf646f5 |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # :Copyright: (c) 2020-2025 Franz Glasner # :License: BSD-3-Clause # :- r"""Utility package. """ __all__ = ["PY2", "PY35", "n", "b", "u", "normalize_filename", "argv2algo", "algotag2algotype", "get_blake2b", "get_blake2b_256", "get_blake2s", "default_algotag", "fsencode", "interpolate_bytes", ] import argparse import hashlib import os import sys PY2 = sys.version_info[0] < 3 PY35 = sys.version_info[:2] >= (3, 5) if PY2: def n(s, encoding="ascii"): """Convert `s` to the native string implementation""" if isinstance(s, unicode): # noqa: F821 undefined name 'unicode' return s.encode(encoding) return s def b(s, encoding="ascii"): """Convert `s` to bytes""" if isinstance(s, unicode): # noqa: F821 undefined name 'unicode' return s.encode(encoding) return s def u(s, encoding="ascii"): """Convert `s` to a unicode string""" if isinstance(s, str): return s.decode(encoding) return s else: def n(s, encoding="ascii"): """Convert `s` to the native string implementation""" if isinstance(s, (bytes, bytearray)): return s.decode(encoding) return s def b(s, encoding="ascii"): """Convert `s` to bytes""" if isinstance(s, str): return s.encode(encoding) return s u = n def default_algotag(): """Determine the "best" default algorithm. Depend on availability in :mod:`hashlib`. Prefer BLAKE2b-256, SHA256 or SHA1 -- in this order. Does not consider :mod:`pyblake2` if it is available eventually. """ if "blake2b" in hashlib.algorithms_available: return "BLAKE2b-256" if "sha256" in hashlib.algorithms_available: return "SHA256" return "SHA1" def get_blake2b(): """Get the factory for blake2b""" try: return hashlib.blake2b except AttributeError: import pyblake2 return pyblake2.blake2b def get_blake2b_256(): """Get the factory for blake2b-256""" try: hashlib.blake2b except AttributeError: import pyblake2 def _get_blake(): return pyblake2.blake2b(digest_size=32) else: def _get_blake(): return hashlib.blake2b(digest_size=32) return _get_blake def get_blake2s(): """Get the factory for blake2s""" try: return hashlib.blake2s except AttributeError: import pyblake2 return pyblake2.blake2s def get_crc(name): """Get the factory for a CRC""" from ..crcmod.predefined import PredefinedCrc def _crc_type(): return PredefinedCrc(name) return _crc_type def argv2algo(s): """Convert a command line algorithm specifier into a tuple with the type/factory of the digest and the algorithms tag for output purposes. :param str s: the specifier from the command line; should include all algorithm tags also (for proper round-tripping) :return: the internal digest specification :rtype: a tuple (digest_type_or_factory, name_in_output) :raises argparse.ArgumentTypeError: for unrecognized algorithms or names String comparisons are done case-insensitively. """ s = s.lower() if s in ("1", "sha1"): return (hashlib.sha1, "SHA1") elif s in ("224", "sha224"): return (hashlib.sha224, "SHA224") elif s in ("256", "sha256"): return (hashlib.sha256, "SHA256") elif s in ("384", "sha384"): return (hashlib.sha384, "SHA384") elif s in ("512", "sha512"): return (hashlib.sha512, "SHA512") elif s in ("3-224", "sha3-224"): return (hashlib.sha3_224, "SHA3-224") elif s in ("3-256", "sha3-256"): return (hashlib.sha3_256, "SHA3-256") elif s in ("3-384", "sha3-384"): return (hashlib.sha3_384, "SHA3-384") elif s in ("3", "3-512", "sha3-512"): return (hashlib.sha3_512, "SHA3-512") elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"): return (get_blake2b(), "BLAKE2b") elif s in ("blake2s", "blake2s-256"): return (get_blake2s(), "BLAKE2s") elif s in ("blake2-256", "blake2b-256"): return (get_blake2b_256(), "BLAKE2b-256") elif s == "md5": return (hashlib.md5, "MD5") elif s in ("crc24", "crc-24", "crc24-openpgp", "crc-24-openpgp"): return (get_crc("crc-24"), "CRC-24") elif s in ("crc32", "crc-32", "crc32-pkzip", "crc-32-pkzip", "crc32-iso", "crc-32-iso", "crc32-iso-hdlc", "crc-32-iso-hdlc"): return (get_crc("crc-32"), "CRC-32-ISO") elif s in ("crc32-posix", "crc-32-posix", "crc32-cksum", "crc-32-cksum", "posix"): return (get_crc("posix"), "CRC-32-POSIX") elif s in ("crc64", "crc-64", "crc64-iso", "crc-64-iso"): return (get_crc("crc-64"), "CRC-64-ISO") elif s in ("crc64-2", "crc-64-2", "crc64-iso-2", "crc-64-iso-2", "crc64-mcrc64", "crc-64-mcrc64"): return (get_crc("crc-64-2"), "CRC-64-ISO-2") elif s in ("crc64-ecma", "crc-64-ecma"): return (get_crc("crc-64-ecma"), "CRC-64-ECMA") elif s in ("crc64-xz", "crc-64-xz", "crc64-go-ecma", "crc-64-go-ecma"): return (get_crc("crc-64-xz"), "CRC-64-XZ") elif s in ("crc64-go", "crc-64-go", "crc64-go-iso", "crc-64-go-iso"): return (get_crc("crc-64-go"), "CRC-64-GO-ISO") elif s in ("crc64-redis", "crc-64-redis"): return (get_crc("crc-64-redis"), "CRC-64-REDIS") else: raise argparse.ArgumentTypeError( "`{}' is not a recognized algorithm".format(s)) def algotag2algotype(s): """Convert the algorithm specifier in a BSD-style digest file to the type/factory of the corresponding algorithm. :param str s: the tag (i.e. normalized name) or the algorithm :return: the digest type or factory for `s` :raises ValueError: on unknown and/or unhandled algorithms All string comparisons are case-sensitive. """ if s == "SHA1": return hashlib.sha1 elif s == "SHA224": return hashlib.sha224 elif s == "SHA256": return hashlib.sha256 elif s == "SHA384": return hashlib.sha384 elif s == "SHA512": return hashlib.sha512 elif s == "SHA3-224": return hashlib.sha3_224 elif s == "SHA3-256": return hashlib.sha3_256 elif s == "SHA3-384": return hashlib.sha3_384 elif s == "SHA3-512": return hashlib.sha3_512 elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"): # compat for openssl return get_blake2b() elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"): # compat for openssl return get_blake2s() elif s in ("BLAKE2b-256", "BLAKE2b256"): # also compat for openssl dgst return get_blake2b_256() elif s == "MD5": return hashlib.md5 elif s == "CRC-24": return get_crc("crc-24") elif s == "CRC-32-ISO": return get_crc("crc-32") elif s == "CRC-32-POSIX": return get_crc("posix") elif s == "CRC-64-ISO": return get_crc("crc-64") elif s == "CRC-64-ISO-2": return get_crc("crc-64-2") elif s == "CRC-64-ECMA": return get_crc("crc-64-ecma") elif s == "CRC-64-XZ": return get_crc("crc-64-xz") elif s == "CRC-64-GO-ISO": return get_crc("crc-64-go") elif s == "CRC-64-REDIS": return get_crc("crc-64-redis") else: raise ValueError("unknown algorithm: {}".format(s)) def normalize_filename(filename, strip_leading_dot_slash=False): if isinstance(filename, bytes): filename = filename.replace(b"\\", b"/") if strip_leading_dot_slash: while filename.startswith(b"./"): filename = filename[2:] else: filename = filename.replace(u"\\", u"/") if strip_leading_dot_slash: while filename.startswith(u"./"): filename = filename[2:] return filename def fsencode(what): """A somewhat compatibility function for :func:`os.fsencode`. If `what` is of type :class:`bytes` no :func:`os.fsencode` is required. """ if isinstance(what, bytes): return what return os.fsencode(what) def interpolate_bytes(formatstr, *values): """Interpolate byte strings also on Python 3.4. :param bytes formatstr: :param values: params for interpolation: may *not* contain Unicode strings :rvalue: the formatted octet :rtype: bytes """ assert isinstance(formatstr, bytes) # Python 3.5+ or Python2 know how to interpolate byte strings if PY35 or PY2: return formatstr % values # Workaround with a Latin-1 dance tformatstr = formatstr.decode("latin1") tvalues = [] for v in values: if PY2: if isinstance(v, unicode): # noqa: F821 undefined name 'unicode' assert False else: if isinstance(v, str): assert False if isinstance(v, bytes): tvalues.append(v.decode("latin1")) else: tvalues.append(v) return (tformatstr % tuple(tvalues)).encode("latin1")
