# HG changeset patch # User Franz Glasner # Date 1645892646 -3600 # Node ID 4c2da9c74d7cd79b9246557a487c4ff728d0b2c8 # Parent 19893b4f42a5c72e306fa555be8c09d6bb1a6941 "compute_digest_file()" now also accepts an already opened file descriptor. It therefore may refere also to a pipe or socket. "mmap" is automatically disabled in these cases. diff -r 19893b4f42a5 -r 4c2da9c74d7c shasum.py --- a/shasum.py Sat Feb 26 14:08:24 2022 +0100 +++ b/shasum.py Sat Feb 26 17:24:06 2022 +0100 @@ -33,6 +33,10 @@ except ImportError: mmap = None import os +try: + import pathlib +except ImportError: + pathlib = None import re import stat import sys @@ -40,6 +44,14 @@ PY2 = sys.version_info[0] < 3 +if PY2: + PATH_TYPES = (unicode, str) # noqa: F821 (undefined name 'unicode') +else: + if pathlib: + PATH_TYPES = (str, bytes, pathlib.Path) + else: + PATH_TYPES = (str, bytes) + CHUNK_SIZE = 1024*1024 MAP_CHUNK_SIZE = 64*1024*1024 @@ -453,22 +465,41 @@ file=dest) -def compute_digest_file(hashobj, filename, use_mmap=True): +def compute_digest_file(hashobj, path, use_mmap=True): """ :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory - :param str filename: filename within the filesystem + :param path: filename within the filesystem or a file descriptor opened in + binary mode (also a socket or pipe) :param bool use_mmap: use the :mod:`mmap` module if available :return: the digest in binary form :rtype: bytes + If a file descriptor is given is must support :func:`os.read`. + """ h = hashobj() - flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ - | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) - fd = os.open(filename, flags) + if isinstance(path, PATH_TYPES): + flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ + | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) + fd = os.open(path, flags) + own_fd = True + else: + fd = path + own_fd = False try: - st = os.fstat(fd) - filesize = st[stat.ST_SIZE] + try: + st = os.fstat(fd) + except TypeError: + # + # "fd" is most probably a Python socket object. + # (a pipe typically supports fstat) + # + use_mmap = False + else: + if stat.S_ISREG(st[stat.ST_MODE]): + filesize = st[stat.ST_SIZE] + else: + use_mmap = False if mmap is None or not use_mmap: # No mmmap available -> use traditional low-level file IO while True: @@ -511,7 +542,8 @@ if rest < mapsize: mapsize = rest finally: - os.close(fd) + if own_fd: + os.close(fd) return h.digest()