diff shasum.py @ 68:4c2da9c74d7c

"compute_digest_file()" now also accepts an already opened file descriptor. It therefore may refere also to a pipe or socket. "mmap" is automatically disabled in these cases.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 26 Feb 2022 17:24:06 +0100
parents 19893b4f42a5
children a23371a8780f
line wrap: on
line diff
--- a/shasum.py	Sat Feb 26 14:08:24 2022 +0100
+++ b/shasum.py	Sat Feb 26 17:24:06 2022 +0100
@@ -33,6 +33,10 @@
 except ImportError:
     mmap = None
 import os
+try:
+    import pathlib
+except ImportError:
+    pathlib = None
 import re
 import stat
 import sys
@@ -40,6 +44,14 @@
 
 PY2 = sys.version_info[0] < 3
 
+if PY2:
+    PATH_TYPES = (unicode, str)    # noqa: F821 (undefined name 'unicode')
+else:
+    if pathlib:
+        PATH_TYPES = (str, bytes, pathlib.Path)
+    else:
+        PATH_TYPES = (str, bytes)
+
 CHUNK_SIZE = 1024*1024
 MAP_CHUNK_SIZE = 64*1024*1024
 
@@ -453,22 +465,41 @@
           file=dest)
 
 
-def compute_digest_file(hashobj, filename, use_mmap=True):
+def compute_digest_file(hashobj, path, use_mmap=True):
     """
     :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
-    :param str filename: filename within the filesystem
+    :param path: filename within the filesystem or a file descriptor opened in
+                 binary mode (also a socket or pipe)
     :param bool use_mmap: use the :mod:`mmap` module if available
     :return: the digest in binary form
     :rtype: bytes
 
+    If a file descriptor is given is must support :func:`os.read`.
+
     """
     h = hashobj()
-    flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \
-        | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0)
-    fd = os.open(filename, flags)
+    if isinstance(path, PATH_TYPES):
+        flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \
+            | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0)
+        fd = os.open(path, flags)
+        own_fd = True
+    else:
+        fd = path
+        own_fd = False
     try:
-        st = os.fstat(fd)
-        filesize = st[stat.ST_SIZE]
+        try:
+            st = os.fstat(fd)
+        except TypeError:
+            #
+            # "fd" is most probably a Python socket object.
+            # (a pipe typically supports fstat)
+            #
+            use_mmap = False
+        else:
+            if stat.S_ISREG(st[stat.ST_MODE]):
+                filesize = st[stat.ST_SIZE]
+            else:
+                use_mmap = False
         if mmap is None or not use_mmap:
             # No mmmap available -> use traditional low-level file IO
             while True:
@@ -511,7 +542,8 @@
                 if rest < mapsize:
                     mapsize = rest
     finally:
-        os.close(fd)
+        if own_fd:
+            os.close(fd)
     return h.digest()