Mercurial > hgrepos > Python > apps > py-cutils
changeset 89:72684020f2f3
By default use mmap only for files up to 8MiB in size.
This follows the FreeBSD cp(1) implementation.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 21 Apr 2022 01:20:35 +0200 |
| parents | f69353f26937 |
| children | 42419f57eda9 |
| files | cutils/shasum.py |
| diffstat | 1 files changed, 25 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/cutils/shasum.py Thu Apr 21 00:24:49 2022 +0200 +++ b/cutils/shasum.py Thu Apr 21 01:20:35 2022 +0200 @@ -45,8 +45,9 @@ else: PATH_TYPES = (str, bytes) -CHUNK_SIZE = 1024*1024 -MAP_CHUNK_SIZE = 64*1024*1024 +READ_CHUNK_SIZE = 2 * 1024 * 1024 # like BUFSIZE_MAX on FreeBSD +MAX_AUTO_MAP_SIZE = 8 * 1024 * 1024 +MAP_WINDOW_SIZE = MAX_AUTO_MAP_SIZE # do not totally trash memory on big files def main(argv=None): @@ -533,12 +534,14 @@ file=dest) -def compute_digest_file(hashobj, path, use_mmap=True): +def compute_digest_file(hashobj, path, use_mmap=None): """ :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory :param path: filename within the filesystem or a file descriptor opened in binary mode (also a socket or pipe) - :param bool use_mmap: use the :mod:`mmap` module if available + :param use_mmap: Use the :mod:`mmap` module if available. + If `None` determine automatically. + :type use_mmap: bool or None :return: the digest in binary form :rtype: bytes @@ -566,13 +569,27 @@ else: if stat.S_ISREG(st[stat.ST_MODE]): filesize = st[stat.ST_SIZE] + if (use_mmap is None) \ + and (filesize > MAX_AUTO_MAP_SIZE): + # + # This is borrowed from FreeBSD's cp(1) implementation: + # Mmap and process if less than 8M (the limit is + # so we don't totally trash memory on big files. + # This is really a minor hack, but it wins some + # CPU back. Some filesystems, such as smbnetfs, + # don't support mmap, so this is a best-effort + # attempt. + # + use_mmap = False else: use_mmap = False + if use_mmap is None: + use_mmap = True if mmap is None or not use_mmap: # No mmap available or wanted -> use traditional low-level file IO while True: try: - buf = os.read(fd, CHUNK_SIZE) + buf = os.read(fd, READ_CHUNK_SIZE) except OSError as e: if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR): @@ -589,10 +606,10 @@ # So ensure to not call mmap.mmap() if the file size is 0. # madvise = getattr(mmap.mmap, "madvise", None) - if filesize < MAP_CHUNK_SIZE: + if filesize <= MAP_WINDOW_SIZE: mapsize = filesize else: - mapsize = MAP_CHUNK_SIZE + mapsize = MAP_WINDOW_SIZE mapoffset = 0 rest = filesize while rest > 0: @@ -628,7 +645,7 @@ h = hashobj() while True: try: - buf = instream.read(CHUNK_SIZE) + buf = instream.read(READ_CHUNK_SIZE) except OSError as e: if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR): raise
