Python/apps/py-cutils: cutils/shasum.py comparison

comparison cutils/shasum.py @ 122:1e5127028254

Move the real computation of digests from files and streams into dedicated submodule cutils.util.digest

author	Franz Glasner <fzglas.hg@dom66.de>
date	Wed, 01 Jan 2025 18:57:25 +0100
parents	a548783381b6
children	a813094ae4f5

comparison

equal deleted inserted replaced

-:2dc26a2f3d1c
+:1e5127028254
 # -*- coding: utf-8 -*-
 # :-
-# :Copyright: (c) 2020-2024 Franz Glasner
+# :Copyright: (c) 2020-2025 Franz Glasner
 # :License:   BSD-3-Clause
 # :-
 r"""Pure Python implementation of `shasum`.
 """
 import argparse
 import base64
 import binascii
 import errno
 import io
-try:
-import mmap
-except ImportError:
-mmap = None
 import os
 import re
-import stat
 import sys
 from . import (__version__, __revision__)
 from . import util
 from .util import constants
+from .util import digest
 def main(argv=None):
 aparser = argparse.ArgumentParser(
 description="Python implementation of shasum",
 dirnames.sort()
 dirfiles.sort()
 for fn in dirfiles:
 path = os.path.join(dirpath, fn)
 out(opts.dest or sys.stdout,
-compute_digest_file(opts.algorithm[0], path,
+digest.compute_digest_file(
-use_mmap=opts.mmap),
+opts.algorithm[0], path, use_mmap=opts.mmap),
 path,
 opts.algorithm[1],
 True,
 opts.base64)
 else:
 msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
 source = sys.stdin
 else:
 source = sys.stdin.buffer
 out(sys.stdout,
-compute_digest_stream(opts.algorithm[0], source),
+digest.compute_digest_stream(opts.algorithm[0], source),
 None,
 opts.algorithm[1],
 True,
 opts.base64)
 else:
 for fn in opts.files:
 out(opts.dest or sys.stdout,
-compute_digest_file(opts.algorithm[0], fn,
+digest.compute_digest_file(
-use_mmap=opts.mmap),
+opts.algorithm[0], fn, use_mmap=opts.mmap),
 fn,
 opts.algorithm[1],
 True,
 opts.base64)
 return 0
 if pl is None:
 exit_code = 1
 print("-: MISSING", file=dest)
 else:
 tag, algo, cl_filename, cl_digest = pl
-computed_digest = compute_digest_stream(algo, source)
+computed_digest = digest.compute_digest_stream(algo, source)
 if compare_digests_equal(computed_digest, cl_digest, algo):
 res = "OK"
 else:
 res = "FAILED"
 exit_code = 1
 if pl is None:
 print("{}: MISSING".format(fn), file=dest)
 exit_code = 1
 else:
 tag, algo, cl_filename, cl_digest = pl
-computed_digest = compute_digest_file(algo, fn,
+computed_digest = digest.compute_digest_file(
-use_mmap=opts.mmap)
+algo, fn, use_mmap=opts.mmap)
 if compare_digests_equal(computed_digest, cl_digest, algo):
 res = "OK"
 else:
 exit_code = 1
 res = "FAILED"
 tag, algo, fn, digest = parts
 if tag in ("SIZE", "TIMESTAMP"):
 assert opts.allow_distinfo
 return (None, None, tag)
 try:
-d = compute_digest_file(algo, fn, use_mmap=opts.mmap)
+d = digest.compute_digest_file(algo, fn, use_mmap=opts.mmap)
 if compare_digests_equal(d, digest, algo):
 return ("ok", fn, tag)
 else:
 return ("failed", fn, tag)
 except EnvironmentError:
 '*' if binary else ' ',
 '-' if filename is None else util.normalize_filename(filename)),
 file=dest)
-def compute_digest_file(hashobj, path, use_mmap=None):
-"""
-:param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
-:param path: filename within the filesystem or a file descriptor opened in
-binary mode (also a socket or pipe)
-:param use_mmap: Use the :mod:`mmap` module if available.
-If `None` determine automatically.
-:type use_mmap: bool or None
-:return: the digest in binary form
-:rtype: bytes
-If a file descriptor is given is must support :func:`os.read`.
-"""
-h = hashobj()
-if isinstance(path, constants.PATH_TYPES):
-flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \
-| getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0)
-fd = os.open(path, flags)
-own_fd = True
-else:
-fd = path
-own_fd = False
-try:
-try:
-st = os.fstat(fd)
-except TypeError:
-#
-# "fd" is most probably a Python socket object.
-# (a pipe typically supports fstat)
-#
-use_mmap = False
-else:
-if stat.S_ISREG(st[stat.ST_MODE]):
-filesize = st[stat.ST_SIZE]
-if (use_mmap is None) \
-and (filesize > constants.MAX_AUTO_MAP_SIZE):
-#
-# This is borrowed from FreeBSD's cp(1) implementation:
-# Mmap and process if less than 8M (the limit is
-# so we don't totally trash memory on big files.
-# This is really a minor hack, but it wins some
-# CPU back.  Some filesystems, such as smbnetfs,
-# don't support mmap, so this is a best-effort
-# attempt.
-#
-use_mmap = False
-else:
-use_mmap = False
-if use_mmap is None:
-use_mmap = True
-if mmap is None or not use_mmap:
-# No mmap available or wanted -> use traditional low-level file IO
-fadvise = getattr(os, "posix_fadvise", None)
-if fadvise:
-fadvise(fd, 0, 0, os.POSIX_FADV_SEQUENTIAL)
-if not constants.PY2:
-fileobj = io.FileIO(fd, mode="r", closefd=False)
-buf = bytearray(constants.READ_CHUNK_SIZE)
-with memoryview(buf) as full_view:
-while True:
-try:
-n = fileobj.readinto(buf)
-except OSError as e:
-if e.errno not in (errno.EAGAIN,
-errno.EWOULDBLOCK,
-errno.EINTR):
-raise
-else:
-if n == 0:
-break
-if n == constants.READ_CHUNK_SIZE:
-h.update(buf)
-else:
-with full_view[:n] as partial_view:
-h.update(partial_view)
-else:
-while True:
-try:
-buf = os.read(fd, constants.READ_CHUNK_SIZE)
-except OSError as e:
-if e.errno not in (errno.EAGAIN,
-errno.EWOULDBLOCK,
-errno.EINTR):
-raise
-else:
-if len(buf) == 0:
-break
-h.update(buf)
-else:
-#
-# Use mmap
-#
-# NOTE: On Windows mmapped files with length 0 are not supported.
-#       So ensure to not call mmap.mmap() if the file size is 0.
-#
-madvise = getattr(mmap.mmap, "madvise", None)
-if filesize <= constants.MAP_WINDOW_SIZE:
-mapsize = filesize
-else:
-mapsize = constants.MAP_WINDOW_SIZE
-mapoffset = 0
-rest = filesize
-while rest > 0:
-m = mmap.mmap(fd,
-mapsize,
-access=mmap.ACCESS_READ,
-offset=mapoffset)
-if madvise:
-madvise(m, mmap.MADV_SEQUENTIAL)
-try:
-h.update(m)
-finally:
-m.close()
-rest -= mapsize
-mapoffset += mapsize
-if rest < mapsize:
-mapsize = rest
-finally:
-if own_fd:
-os.close(fd)
-return h.digest()
-def compute_digest_stream(hashobj, instream):
-"""
-:param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
-:param instream: a bytes input stream to read the data to be hashed from
-:return: the digest in binary form
-:rtype: bytes
-"""
-h = hashobj()
-while True:
-try:
-buf = instream.read(constants.READ_CHUNK_SIZE)
-except OSError as e:
-if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR):
-raise
-else:
-if buf is not None:
-if len(buf) == 0:
-break
-h.update(buf)
-return h.digest()
 if __name__ == "__main__":
 sys.exit(main())

Mercurial > hgrepos > Python > apps > py-cutils

comparison cutils/shasum.py @ 122:1e5127028254