Mercurial > hgrepos > Python > apps > py-cutils
view cutils/util/__init__.py @ 380:58552d3d1766
treesum: begin unittests for treesums and .treesum files
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 12 May 2025 15:33:16 +0200 |
| parents | 6d7659a709f2 |
| children |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # SPDX-FileCopyrightText: © 2020-2025 Franz Glasner # SPDX-License-Identifier: BSD-3-Clause # :- r"""Utility package. """ from __future__ import print_function, absolute_import __all__ = ["PY2", "PY35", "n", "b", "u", "parse_grouped_decimal_number", "normalize_filename", "escape_for_output", "argv2algo", "algotag2algotype", "algotag2digest_size", "get_blake2b", "get_blake2b_256", "get_blake2s", "default_algotag", "fsencode", "interpolate_bytes", ] import argparse import hashlib import os import sys PY2 = sys.version_info[0] < 3 PY35 = sys.version_info[:2] >= (3, 5) if PY2: def n(s, encoding="ascii"): """Convert `s` to the native string implementation""" if isinstance(s, unicode): # noqa: F821 undefined name 'unicode' return s.encode(encoding) return s def b(s, encoding="ascii", errors="strict"): """Convert `s` to bytes""" if isinstance(s, unicode): # noqa: F821 undefined name 'unicode' return s.encode(encoding, errors) return s def u(s, encoding="ascii"): """Convert `s` to a unicode string""" if isinstance(s, str): return s.decode(encoding) return s def parse_grouped_decimal_number(s): return int(n(s).translate(None, "., '_"), 10) else: def n(s, encoding="ascii"): """Convert `s` to the native string implementation""" if isinstance(s, (bytes, bytearray)): return s.decode(encoding) return s def b(s, encoding="ascii", errors="strict"): """Convert `s` to bytes""" if isinstance(s, str): return s.encode(encoding, errors) return s u = n def parse_grouped_decimal_number(s): return int(n(s).translate(NORMALIZATION_DELETE_CHARS), 10) NORMALIZATION_DELETE_CHARS = { ord('.'): None, ord(','): None, ord(' '): None, ord("'"): None, ord('_'): None } def escape_for_output(what): """Escape `what` in such a way that the output can be safely written into a line and/or column-oriented output file """ if isinstance(what, bytes): return (what.replace(b'\\', b"\\\\") .replace(b'\n', b"\\x0a") .replace(b'\r', b"\\x0d") .replace(b'\t', b"\\x09")) else: return (what.replace(u'\\', u"\\\\") .replace(u'\n', u"\\x0a") .replace(u'\r', u"\\x0d") .replace(u'\t', u"\\x09")) def default_algotag(): """Determine the "best" default algorithm. Depend on availability in :mod:`hashlib`. Prefer BLAKE2b-256, SHA256 or SHA1 -- in this order. Does not consider :mod:`pyblake2` if it is available eventually. """ # Python <2.7.9 has no algorithms_available: fall back to algorithms try: algos = hashlib.algorithms_available except AttributeError: try: algos = hashlib.algorithms except AttributeError: algos = [] if "blake2b" in algos: return "BLAKE2b-256" if "sha256" in algos: return "SHA256" return "SHA1" def get_blake2b(): """Get the factory for blake2b""" try: return hashlib.blake2b except AttributeError: import pyblake2 return pyblake2.blake2b def get_blake2b_256(): """Get the factory for blake2b-256""" try: hashlib.blake2b except AttributeError: import pyblake2 def _get_blake(): return pyblake2.blake2b(digest_size=32) else: def _get_blake(): return hashlib.blake2b(digest_size=32) return _get_blake def get_blake2s(): """Get the factory for blake2s""" try: return hashlib.blake2s except AttributeError: import pyblake2 return pyblake2.blake2s def get_crc(name): """Get the factory for a CRC""" from ..crcmod.predefined import PredefinedCrc def _crc_type(): return PredefinedCrc(name) return _crc_type def argv2algo(s): """Convert a command line algorithm specifier into a tuple with the type/factory of the digest and the algorithms tag for output purposes. :param str s: the specifier from the command line; should include all algorithm tags also (for proper round-tripping) :return: the internal digest specification :rtype: a tuple (digest_type_or_factory, name_in_output) :raises argparse.ArgumentTypeError: for unrecognized algorithms or names String comparisons are done case-insensitively. """ s = s.lower() if s in ("1", "sha1"): return (hashlib.sha1, "SHA1") elif s in ("224", "sha224"): return (hashlib.sha224, "SHA224") elif s in ("256", "sha256"): return (hashlib.sha256, "SHA256") elif s in ("384", "sha384"): return (hashlib.sha384, "SHA384") elif s in ("512", "sha512"): return (hashlib.sha512, "SHA512") elif s in ("3-224", "sha3-224"): return (hashlib.sha3_224, "SHA3-224") elif s in ("3-256", "sha3-256"): return (hashlib.sha3_256, "SHA3-256") elif s in ("3-384", "sha3-384"): return (hashlib.sha3_384, "SHA3-384") elif s in ("3", "3-512", "sha3-512"): return (hashlib.sha3_512, "SHA3-512") elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"): return (get_blake2b(), "BLAKE2b") elif s in ("blake2s", "blake2s-256"): return (get_blake2s(), "BLAKE2s") elif s in ("blake2-256", "blake2b-256"): return (get_blake2b_256(), "BLAKE2b-256") elif s == "md5": return (hashlib.md5, "MD5") elif s in ("crc24", "crc-24", "crc24-openpgp", "crc-24-openpgp"): return (get_crc("crc-24"), "CRC-24") elif s in ("crc32", "crc-32", "crc32-pkzip", "crc-32-pkzip", "crc32-iso", "crc-32-iso", "crc32-iso-hdlc", "crc-32-iso-hdlc"): return (get_crc("crc-32"), "CRC-32-ISO") elif s in ("crc32-posix", "crc-32-posix", "crc32-cksum", "crc-32-cksum", "posix"): return (get_crc("posix"), "CRC-32-POSIX") elif s in ("crc64", "crc-64", "crc64-iso", "crc-64-iso"): return (get_crc("crc-64"), "CRC-64-ISO") elif s in ("crc64-2", "crc-64-2", "crc64-iso-2", "crc-64-iso-2", "crc64-mcrc64", "crc-64-mcrc64"): return (get_crc("crc-64-2"), "CRC-64-ISO-2") elif s in ("crc64-ecma", "crc-64-ecma"): return (get_crc("crc-64-ecma"), "CRC-64-ECMA") elif s in ("crc64-xz", "crc-64-xz", "crc64-go-ecma", "crc-64-go-ecma"): return (get_crc("crc-64-xz"), "CRC-64-XZ") elif s in ("crc64-go", "crc-64-go", "crc64-go-iso", "crc-64-go-iso"): return (get_crc("crc-64-go"), "CRC-64-GO-ISO") elif s in ("crc64-redis", "crc-64-redis"): return (get_crc("crc-64-redis"), "CRC-64-REDIS") else: raise argparse.ArgumentTypeError( "`{}' is not a recognized algorithm".format(s)) def algotag2algotype(s): """Convert the algorithm specifier in a BSD-style digest file to the type/factory of the corresponding algorithm. :param str s: the tag (i.e. normalized name) or the algorithm :return: the digest type or factory for `s` :raises ValueError: on unknown and/or unhandled algorithms :raises ImportError: if a module that is required to handle given specifier `s` is not available (e.g. BLAKE2b on Python 2) All string comparisons are case-sensitive. """ # Standard in Python2.7 if s == "MD5": return hashlib.md5 elif s == "SHA1": return hashlib.sha1 elif s == "SHA224": return hashlib.sha224 elif s == "SHA256": return hashlib.sha256 elif s == "SHA384": return hashlib.sha384 elif s == "SHA512": return hashlib.sha512 # Available in Python 3.6+ elif s == "SHA3-224": return hashlib.sha3_224 elif s == "SHA3-256": return hashlib.sha3_256 elif s == "SHA3-384": return hashlib.sha3_384 elif s == "SHA3-512": return hashlib.sha3_512 # Available in Python 3.6+ or if pyblake2 is installed elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"): # compat for openssl return get_blake2b() elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"): # compat for openssl return get_blake2s() elif s in ("BLAKE2b-256", "BLAKE2b256"): # also compat for openssl dgst return get_blake2b_256() # Vendored in cutils.crcmod elif s == "CRC-24": return get_crc("crc-24") elif s == "CRC-32-ISO": return get_crc("crc-32") elif s == "CRC-32-POSIX": return get_crc("posix") elif s == "CRC-64-ISO": return get_crc("crc-64") elif s == "CRC-64-ISO-2": return get_crc("crc-64-2") elif s == "CRC-64-ECMA": return get_crc("crc-64-ecma") elif s == "CRC-64-XZ": return get_crc("crc-64-xz") elif s == "CRC-64-GO-ISO": return get_crc("crc-64-go") elif s == "CRC-64-REDIS": return get_crc("crc-64-redis") else: raise ValueError("unknown algorithm: {}".format(s)) def algotag2digest_size(s): """Get the `digest_size` in bytes from given algorithm specifier `s`. Contains a small static database of digest sizes for algorithms that are not available by default in older Python versions. :raises ValueError: on unknown and/or unhandled algorithms :raises ImportError: if a module that is required to handle given specifier `s` is not available (e.g. BLAKE2b on Python 2) All string comparisons are case-sensitive. """ try: dgst = algotag2algotype(s)() return dgst.digest_size except ImportError: sz = { "SHA3-224": 28, "SHA3-256": 32, "SHA3-384": 48, "SHA3-512": 64, "BLAKE2b": 64, "BLAKE2b-512": 64, "BLAKE2b512": 64, "BLAKE2b-256": 32, "BLAKE2b256": 32, "BLAKE2s": 32, "BLAKE2s-256": 32, "BLAKE2s256": 32, }.get(s, None) if not sz: raise return sz def normalize_filename(filename, strip_dot_slashes=False): if isinstance(filename, bytes): filename = filename.replace(b"\\", b"/") if strip_dot_slashes: while filename.startswith(b"./"): filename = filename[2:] # This also handles adjacent /./ cases while b"/./" in filename: filename = filename.replace(b"/./", b"/", 1) while filename.endswith(b"/."): filename = filename[:-2] else: filename = filename.replace(u"\\", u"/") if strip_dot_slashes: while filename.startswith(u"./"): filename = filename[2:] # This also handles adjacent /./ cases while u"/./" in filename: filename = filename.replace(u"/./", u"/", 1) while filename.endswith(u"/."): filename = filename[:-2] return filename def fsencode(what): """A somewhat compatibility function for :func:`os.fsencode`. If `what` is of type :class:`bytes` no :func:`os.fsencode` is required. """ if isinstance(what, bytes): return what return os.fsencode(what) def interpolate_bytes(formatstr, *values): """Interpolate byte strings also on Python 3.4. :param bytes formatstr: :param values: params for interpolation: may *not* contain Unicode strings :rvalue: the formatted octet :rtype: bytes """ assert isinstance(formatstr, bytes) # Python 3.5+ or Python2 know how to interpolate byte strings if PY35 or PY2: return formatstr % values # Workaround with a Latin-1 dance tformatstr = formatstr.decode("latin1") tvalues = [] for v in values: # Do not automatically convert text (unicode) string values into bytes. if PY2: if isinstance(v, unicode): # noqa: F821 undefined name 'unicode' raise TypeError( "unicode values not supported when interpolating" " into bytes") else: if isinstance(v, str): raise TypeError( "unicode (native string) values not supported when" " interpolating into bytes") if isinstance(v, bytes): tvalues.append(v.decode("latin1")) else: tvalues.append(v) return (tformatstr % tuple(tvalues)).encode("latin1")
