view shasum.py @ 9:81f28bf89c26

Some more output selection options and documentation
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 04 Dec 2020 15:50:28 +0100
parents 048b97213a23
children 77446cd3ea6f
line wrap: on
line source

r"""
:Author:    Franz Glasner
:Copyright: (c) 2020 Franz Glasner.
            All rights reserved.
:License:   BSD 3-Clause "New" or "Revised" License.
            See :ref:`LICENSE <license>` for details.
            If you cannot find LICENSE see
            <https://opensource.org/licenses/BSD-3-Clause>
:ID:        @(#) HGid$

"""

from __future__ import print_function

import argparse
import hashlib
import sys


PY2 = sys.version_info[0] < 3

CHUNK_SIZE = 1024 * 1024 * 1024


def main(argv=None):
    aparser = argparse.ArgumentParser(
        description="Python implementation of shasum",
        fromfile_prefix_chars='@')
    aparser.add_argument(
        "--algorithm", "-a", action="store", type=argv2algo,
        help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, md5")
    aparser.add_argument(
        "--binary", "-b", action="store_false", dest="text_mode", default=False,
        help="read in binary mode (default)")
    aparser.add_argument(
        "--bsd", "-B", action="store_true", dest="bsd", default=False,
        help="write BSD style output; also :command:`openssl dgst` style")
    aparser.add_argument(
        "--reverse", "-r", action="store_false", dest="bsd", default=False,
        help="explicitely select normal coreutils style output (to be option compatible with BSD style commands and :command:`openssl dgst -r`)")
    aparser.add_argument(
        "--tag", action="store_true", dest="bsd", default=False,
        help="alias for the `--bsd' option (to be compatible with :command:`b2sum`)")
    aparser.add_argument(
        "--text", "-t", action="store_true", dest="text_mode", default=False,
        help="read in text mode (not supported)")
    aparser.add_argument(
        "files", nargs="*", metavar="FILE")

    opts = aparser.parse_args(args=argv)

    if opts.text_mode:
        print("ERROR: text mode not supported", file=sys.stderr)
        sys.exit(78)   # :manpage:`sysexits(3)` EX_CONFIG

    if not opts.algorithm:
        opts.algorithm = argv2algo("1")

    if opts.bsd:
        out = out_bsd
    else:
        out = out_std

    if not opts.files:
        opts.files.append('-')
    if len(opts.files) == 1 and opts.files[0] == '-':
        if PY2:
            if sys.platform == "win32":
                import os. msvcrt
                msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
            source = sys.stdin
        else:
            source = sys.stdin.buffer
        out(sys.stdout,
            compute_digest(opts.algorithm[0], source),
            None,
            opts.algorithm[1],
            True)
    else:
        for fn in opts.files:
            with open(fn, "rb") as source:
                out(sys.stdout,
                    compute_digest(opts.algorithm[0], source),
                    fn,
                    opts.algorithm[1],
                    True)


def argv2algo(s):
    """Convert a commane line algorithm specifier into a tuple with the
    type/factory of the digest and the algorithms tag for output purposes

    :param str s: the specifier from the commane line
    :return: the internal digest specification
    :rtype: a tuple (digest_type_or_factory, name_in_output)

    String comparisons are done case-insensitively.

    """
    s = s.lower()
    if s in ("1", "sha1"):
        return (hashlib.sha1, "SHA1")
    elif s in ("224", "sha224"):
        return (hashlib.sha224, "SHA224")
    elif s in ("256", "sha256"):
        return (hashlib.sha256, "SHA256")
    elif s in ("384", "sha384"):
        return (hashlib.sha384, "SHA384")
    elif s in ("512", "sha512"):
        return (hashlib.sha512, "SHA512")
    elif s in ("3-224", "sha3-224"):
        return (hashlib.sha3_224, "SHA3-224")
    elif s in ("3-256", "sha3-256"):
        return (hashlib.sha3_256, "SHA3-256")
    elif s in ("3-384", "sha3-384"):
        return (hashlib.sha3_384, "SHA3-384")
    elif s in ("3-512", "sha3-512"):
        return (hashlib.sha3_512, "SHA3-512")
    elif s in ("blake2b", "blake2b-512"):
        return (hashlib.blake2b, "BLAKE2b")
    elif s in ("blake2s", "blake2s-256"):
        return (hashlib.blake2s, "BLAKE2s")
    elif s == "md5":
        return (hashlib.md5, "MD5")
    else:
        raise argparse.ArgumentTypeError(
            "`{}' is not a recognized algorithm".format(s))


def out_bsd(dest, digest, filename, digestname, binary):
    """BSD format output, also :command:`openssl dgst` and
    :command:`b2sum --tag" format output

    """
    if filename is None:
        print(digest, file=dest)
    else:
        print("{} ({}) = {}".format(digestname, filename, digest),
              file=dest)


def out_std(dest, digest, filename, digestname, binary):
    """Coreutils format (:command:`shasum` et al.)

    """
    print("{} {}{}".format(digest,
                           '*' if binary else ' ',
                           '-' if filename is None else filename),
          file=dest)


def compute_digest(hashobj, instream):
    """

    :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
    :param instream: a bytes input stream to read the data to be hashed from
    :return: the digest in hex form
    :rtype: str

    """
    h = hashobj()
    while True:
        buf = instream.read(CHUNK_SIZE)
        if buf is not None:
            if len(buf) == 0:
                break
            h.update(buf)
    return h.hexdigest()


if __name__ == "__main__":
    sys.exit(main())