diff shasum.py @ 2:5510a39a2d04

Basic hashing with proper binary stdin/stdout support for Py2, Py3 and Windows
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 04 Dec 2020 11:41:25 +0100
parents bbf4e0f5b651
children 5a6ed622846c
line wrap: on
line diff
--- a/shasum.py	Thu Dec 03 09:05:35 2020 +0100
+++ b/shasum.py	Fri Dec 04 11:41:25 2020 +0100
@@ -10,30 +10,65 @@
 
 """
 
+from __future__ import print_function
 
 import argparse
 import hashlib
 import sys
 
 
+PY2 = sys.version_info[0] < 3
+
+CHUNK_SIZE = 1024 * 1024 * 1024
+
+
 def main(argv=None):
+    if argv is None:
+        argv = sys.argv[1:]
 
     aparser = argparse.ArgumentParser(
         description="Python implementation of shasum",
         fromfile_prefix_chars='@')
+    aparser.add_argument(
+        "files", nargs="*", metavar="FILE")
 
     opts = aparser.parse_args(args=argv)
 
+    if not opts.files:
+        opts.files.append("-")
+    for fn in opts.files:
+        if fn == "-":
+            if PY2:
+                if sys.platform == "win32":
+                    import os. msvcrt
+                    msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
+                source = sys.stdin
+            else:
+                source = sys.stdin.buffer
+            print(compute_digest(hashlib.sha256, source))
+        else:
+            with open(fn, "rb") as source:
+                print(compute_digest(hashlib.sha256, source))
+
 
 def compute_digest(hashobj, instream):
     """
 
-    :param hashobj: a :mod:`hashlib` compatible hash algorithm instance
+    :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
     :param instream: a bytes input stream to read the data to be hashed from
     :return: the digest in hex form
     :rtype: str
 
     """
-    
+    h = hashobj()
+    while True:
+        buf = instream.read(CHUNK_SIZE)
+        if buf is not None:
+            if len(buf) == 0:
+                break
+            h.update(buf)
+    return h.hexdigest()
+
+
 if __name__ == "__main__":
     sys.exit(main())