Python/apps/py-cutils: cutils/util/digest.py comparison

comparison cutils/util/digest.py @ 122:1e5127028254

Move the real computation of digests from files and streams into dedicated submodule cutils.util.digest

author	Franz Glasner <fzglas.hg@dom66.de>
date	Wed, 01 Jan 2025 18:57:25 +0100
parents
children	a813094ae4f5

comparison

equal deleted inserted replaced

-:2dc26a2f3d1c
+:1e5127028254
+# -*- coding: utf-8 -*-
+# :-
+# :Copyright: (c) 2020-2025 Franz Glasner
+# :License:   BSD-3-Clause
+# :-
+r"""Utility sub-module to implement a file and stream digest computations.
+"""
+__all__ = ["compute_digest_file", "compute_digest_stream"]
+import errno
+import io
+import os
+try:
+import mmap
+except ImportError:
+mmap = None
+import stat
+from . import constants
+def compute_digest_file(hashobj, path, use_mmap=None):
+"""Compute the digest for a file with a filename of an open fd.
+:param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
+:param path: filename within the filesystem or a file descriptor opened in
+binary mode (also a socket or pipe)
+:param use_mmap: Use the :mod:`mmap` module if available.
+If `None` determine automatically.
+:type use_mmap: bool or None
+:return: the digest in binary form
+:rtype: bytes
+If a file descriptor is given is must support :func:`os.read`.
+"""
+h = hashobj()
+if isinstance(path, constants.PATH_TYPES):
+flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \
+| getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0)
+fd = os.open(path, flags)
+own_fd = True
+else:
+fd = path
+own_fd = False
+try:
+try:
+st = os.fstat(fd)
+except TypeError:
+#
+# "fd" is most probably a Python socket object.
+# (a pipe typically supports fstat)
+#
+use_mmap = False
+else:
+if stat.S_ISREG(st[stat.ST_MODE]):
+filesize = st[stat.ST_SIZE]
+if (use_mmap is None) \
+and (filesize > constants.MAX_AUTO_MAP_SIZE):
+#
+# This is borrowed from FreeBSD's cp(1) implementation:
+# Mmap and process if less than 8M (the limit is
+# so we don't totally trash memory on big files.
+# This is really a minor hack, but it wins some
+# CPU back.  Some filesystems, such as smbnetfs,
+# don't support mmap, so this is a best-effort
+# attempt.
+#
+use_mmap = False
+else:
+use_mmap = False
+if use_mmap is None:
+use_mmap = True
+if mmap is None or not use_mmap:
+# No mmap available or wanted -> use traditional low-level file IO
+fadvise = getattr(os, "posix_fadvise", None)
+if fadvise:
+fadvise(fd, 0, 0, os.POSIX_FADV_SEQUENTIAL)
+if not constants.PY2:
+fileobj = io.FileIO(fd, mode="r", closefd=False)
+buf = bytearray(constants.READ_CHUNK_SIZE)
+with memoryview(buf) as full_view:
+while True:
+try:
+n = fileobj.readinto(buf)
+except OSError as e:
+if e.errno not in (errno.EAGAIN,
+errno.EWOULDBLOCK,
+errno.EINTR):
+raise
+else:
+if n == 0:
+break
+if n == constants.READ_CHUNK_SIZE:
+h.update(buf)
+else:
+with full_view[:n] as partial_view:
+h.update(partial_view)
+else:
+while True:
+try:
+buf = os.read(fd, constants.READ_CHUNK_SIZE)
+except OSError as e:
+if e.errno not in (errno.EAGAIN,
+errno.EWOULDBLOCK,
+errno.EINTR):
+raise
+else:
+if len(buf) == 0:
+break
+h.update(buf)
+else:
+#
+# Use mmap
+#
+# NOTE: On Windows mmapped files with length 0 are not supported.
+#       So ensure to not call mmap.mmap() if the file size is 0.
+#
+madvise = getattr(mmap.mmap, "madvise", None)
+if filesize <= constants.MAP_WINDOW_SIZE:
+mapsize = filesize
+else:
+mapsize = constants.MAP_WINDOW_SIZE
+mapoffset = 0
+rest = filesize
+while rest > 0:
+m = mmap.mmap(fd,
+mapsize,
+access=mmap.ACCESS_READ,
+offset=mapoffset)
+if madvise:
+madvise(m, mmap.MADV_SEQUENTIAL)
+try:
+h.update(m)
+finally:
+m.close()
+rest -= mapsize
+mapoffset += mapsize
+if rest < mapsize:
+mapsize = rest
+finally:
+if own_fd:
+os.close(fd)
+return h.digest()
+def compute_digest_stream(hashobj, instream):
+"""Compute the digest for a given byte string `instream`.
+:param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
+:param instream: a bytes input stream to read the data to be hashed from
+:return: the digest in binary form
+:rtype: bytes
+"""
+h = hashobj()
+while True:
+try:
+buf = instream.read(constants.READ_CHUNK_SIZE)
+except OSError as e:
+if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR):
+raise
+else:
+if buf is not None:
+if len(buf) == 0:
+break
+h.update(buf)
+return h.digest()

Mercurial > hgrepos > Python > apps > py-cutils

comparison cutils/util/digest.py @ 122:1e5127028254