Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/shasum.py @ 89:72684020f2f3
By default use mmap only for files up to 8MiB in size.
This follows the FreeBSD cp(1) implementation.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Thu, 21 Apr 2022 01:20:35 +0200 |
| parents | f69353f26937 |
| children | 42419f57eda9 |
comparison
equal
deleted
inserted
replaced
| 88:f69353f26937 | 89:72684020f2f3 |
|---|---|
| 43 if pathlib: | 43 if pathlib: |
| 44 PATH_TYPES = (str, bytes, pathlib.Path) | 44 PATH_TYPES = (str, bytes, pathlib.Path) |
| 45 else: | 45 else: |
| 46 PATH_TYPES = (str, bytes) | 46 PATH_TYPES = (str, bytes) |
| 47 | 47 |
| 48 CHUNK_SIZE = 1024*1024 | 48 READ_CHUNK_SIZE = 2 * 1024 * 1024 # like BUFSIZE_MAX on FreeBSD |
| 49 MAP_CHUNK_SIZE = 64*1024*1024 | 49 MAX_AUTO_MAP_SIZE = 8 * 1024 * 1024 |
| 50 MAP_WINDOW_SIZE = MAX_AUTO_MAP_SIZE # do not totally trash memory on big files | |
| 50 | 51 |
| 51 | 52 |
| 52 def main(argv=None): | 53 def main(argv=None): |
| 53 aparser = argparse.ArgumentParser( | 54 aparser = argparse.ArgumentParser( |
| 54 description="Python implementation of shasum", | 55 description="Python implementation of shasum", |
| 531 '*' if binary else ' ', | 532 '*' if binary else ' ', |
| 532 '-' if filename is None else normalize_filename(filename)), | 533 '-' if filename is None else normalize_filename(filename)), |
| 533 file=dest) | 534 file=dest) |
| 534 | 535 |
| 535 | 536 |
| 536 def compute_digest_file(hashobj, path, use_mmap=True): | 537 def compute_digest_file(hashobj, path, use_mmap=None): |
| 537 """ | 538 """ |
| 538 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory | 539 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory |
| 539 :param path: filename within the filesystem or a file descriptor opened in | 540 :param path: filename within the filesystem or a file descriptor opened in |
| 540 binary mode (also a socket or pipe) | 541 binary mode (also a socket or pipe) |
| 541 :param bool use_mmap: use the :mod:`mmap` module if available | 542 :param use_mmap: Use the :mod:`mmap` module if available. |
| 543 If `None` determine automatically. | |
| 544 :type use_mmap: bool or None | |
| 542 :return: the digest in binary form | 545 :return: the digest in binary form |
| 543 :rtype: bytes | 546 :rtype: bytes |
| 544 | 547 |
| 545 If a file descriptor is given is must support :func:`os.read`. | 548 If a file descriptor is given is must support :func:`os.read`. |
| 546 | 549 |
| 564 # | 567 # |
| 565 use_mmap = False | 568 use_mmap = False |
| 566 else: | 569 else: |
| 567 if stat.S_ISREG(st[stat.ST_MODE]): | 570 if stat.S_ISREG(st[stat.ST_MODE]): |
| 568 filesize = st[stat.ST_SIZE] | 571 filesize = st[stat.ST_SIZE] |
| 572 if (use_mmap is None) \ | |
| 573 and (filesize > MAX_AUTO_MAP_SIZE): | |
| 574 # | |
| 575 # This is borrowed from FreeBSD's cp(1) implementation: | |
| 576 # Mmap and process if less than 8M (the limit is | |
| 577 # so we don't totally trash memory on big files. | |
| 578 # This is really a minor hack, but it wins some | |
| 579 # CPU back. Some filesystems, such as smbnetfs, | |
| 580 # don't support mmap, so this is a best-effort | |
| 581 # attempt. | |
| 582 # | |
| 583 use_mmap = False | |
| 569 else: | 584 else: |
| 570 use_mmap = False | 585 use_mmap = False |
| 586 if use_mmap is None: | |
| 587 use_mmap = True | |
| 571 if mmap is None or not use_mmap: | 588 if mmap is None or not use_mmap: |
| 572 # No mmap available or wanted -> use traditional low-level file IO | 589 # No mmap available or wanted -> use traditional low-level file IO |
| 573 while True: | 590 while True: |
| 574 try: | 591 try: |
| 575 buf = os.read(fd, CHUNK_SIZE) | 592 buf = os.read(fd, READ_CHUNK_SIZE) |
| 576 except OSError as e: | 593 except OSError as e: |
| 577 if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, | 594 if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, |
| 578 errno.EINTR): | 595 errno.EINTR): |
| 579 raise | 596 raise |
| 580 else: | 597 else: |
| 587 # | 604 # |
| 588 # NOTE: On Windows mmapped files with length 0 are not supported. | 605 # NOTE: On Windows mmapped files with length 0 are not supported. |
| 589 # So ensure to not call mmap.mmap() if the file size is 0. | 606 # So ensure to not call mmap.mmap() if the file size is 0. |
| 590 # | 607 # |
| 591 madvise = getattr(mmap.mmap, "madvise", None) | 608 madvise = getattr(mmap.mmap, "madvise", None) |
| 592 if filesize < MAP_CHUNK_SIZE: | 609 if filesize <= MAP_WINDOW_SIZE: |
| 593 mapsize = filesize | 610 mapsize = filesize |
| 594 else: | 611 else: |
| 595 mapsize = MAP_CHUNK_SIZE | 612 mapsize = MAP_WINDOW_SIZE |
| 596 mapoffset = 0 | 613 mapoffset = 0 |
| 597 rest = filesize | 614 rest = filesize |
| 598 while rest > 0: | 615 while rest > 0: |
| 599 m = mmap.mmap(fd, | 616 m = mmap.mmap(fd, |
| 600 mapsize, | 617 mapsize, |
| 626 | 643 |
| 627 """ | 644 """ |
| 628 h = hashobj() | 645 h = hashobj() |
| 629 while True: | 646 while True: |
| 630 try: | 647 try: |
| 631 buf = instream.read(CHUNK_SIZE) | 648 buf = instream.read(READ_CHUNK_SIZE) |
| 632 except OSError as e: | 649 except OSError as e: |
| 633 if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR): | 650 if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR): |
| 634 raise | 651 raise |
| 635 else: | 652 else: |
| 636 if buf is not None: | 653 if buf is not None: |
