comparison shasum.py @ 68:4c2da9c74d7c

"compute_digest_file()" now also accepts an already opened file descriptor. It therefore may refere also to a pipe or socket. "mmap" is automatically disabled in these cases.
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 26 Feb 2022 17:24:06 +0100
parents 19893b4f42a5
children a23371a8780f
comparison
equal deleted inserted replaced
67:19893b4f42a5 68:4c2da9c74d7c
31 try: 31 try:
32 import mmap 32 import mmap
33 except ImportError: 33 except ImportError:
34 mmap = None 34 mmap = None
35 import os 35 import os
36 try:
37 import pathlib
38 except ImportError:
39 pathlib = None
36 import re 40 import re
37 import stat 41 import stat
38 import sys 42 import sys
39 43
40 44
41 PY2 = sys.version_info[0] < 3 45 PY2 = sys.version_info[0] < 3
46
47 if PY2:
48 PATH_TYPES = (unicode, str) # noqa: F821 (undefined name 'unicode')
49 else:
50 if pathlib:
51 PATH_TYPES = (str, bytes, pathlib.Path)
52 else:
53 PATH_TYPES = (str, bytes)
42 54
43 CHUNK_SIZE = 1024*1024 55 CHUNK_SIZE = 1024*1024
44 MAP_CHUNK_SIZE = 64*1024*1024 56 MAP_CHUNK_SIZE = 64*1024*1024
45 57
46 58
451 '*' if binary else ' ', 463 '*' if binary else ' ',
452 '-' if filename is None else normalize_filename(filename)), 464 '-' if filename is None else normalize_filename(filename)),
453 file=dest) 465 file=dest)
454 466
455 467
456 def compute_digest_file(hashobj, filename, use_mmap=True): 468 def compute_digest_file(hashobj, path, use_mmap=True):
457 """ 469 """
458 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory 470 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory
459 :param str filename: filename within the filesystem 471 :param path: filename within the filesystem or a file descriptor opened in
472 binary mode (also a socket or pipe)
460 :param bool use_mmap: use the :mod:`mmap` module if available 473 :param bool use_mmap: use the :mod:`mmap` module if available
461 :return: the digest in binary form 474 :return: the digest in binary form
462 :rtype: bytes 475 :rtype: bytes
463 476
477 If a file descriptor is given is must support :func:`os.read`.
478
464 """ 479 """
465 h = hashobj() 480 h = hashobj()
466 flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ 481 if isinstance(path, PATH_TYPES):
467 | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) 482 flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \
468 fd = os.open(filename, flags) 483 | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0)
484 fd = os.open(path, flags)
485 own_fd = True
486 else:
487 fd = path
488 own_fd = False
469 try: 489 try:
470 st = os.fstat(fd) 490 try:
471 filesize = st[stat.ST_SIZE] 491 st = os.fstat(fd)
492 except TypeError:
493 #
494 # "fd" is most probably a Python socket object.
495 # (a pipe typically supports fstat)
496 #
497 use_mmap = False
498 else:
499 if stat.S_ISREG(st[stat.ST_MODE]):
500 filesize = st[stat.ST_SIZE]
501 else:
502 use_mmap = False
472 if mmap is None or not use_mmap: 503 if mmap is None or not use_mmap:
473 # No mmmap available -> use traditional low-level file IO 504 # No mmmap available -> use traditional low-level file IO
474 while True: 505 while True:
475 try: 506 try:
476 buf = os.read(fd, CHUNK_SIZE) 507 buf = os.read(fd, CHUNK_SIZE)
509 rest -= mapsize 540 rest -= mapsize
510 mapoffset += mapsize 541 mapoffset += mapsize
511 if rest < mapsize: 542 if rest < mapsize:
512 mapsize = rest 543 mapsize = rest
513 finally: 544 finally:
514 os.close(fd) 545 if own_fd:
546 os.close(fd)
515 return h.digest() 547 return h.digest()
516 548
517 549
518 def compute_digest_stream(hashobj, instream): 550 def compute_digest_stream(hashobj, instream):
519 """ 551 """