Mercurial > hgrepos > Python > apps > py-cutils
comparison shasum.py @ 68:4c2da9c74d7c
"compute_digest_file()" now also accepts an already opened file descriptor.
It therefore may refere also to a pipe or socket. "mmap" is automatically
disabled in these cases.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 26 Feb 2022 17:24:06 +0100 |
| parents | 19893b4f42a5 |
| children | a23371a8780f |
comparison
equal
deleted
inserted
replaced
| 67:19893b4f42a5 | 68:4c2da9c74d7c |
|---|---|
| 31 try: | 31 try: |
| 32 import mmap | 32 import mmap |
| 33 except ImportError: | 33 except ImportError: |
| 34 mmap = None | 34 mmap = None |
| 35 import os | 35 import os |
| 36 try: | |
| 37 import pathlib | |
| 38 except ImportError: | |
| 39 pathlib = None | |
| 36 import re | 40 import re |
| 37 import stat | 41 import stat |
| 38 import sys | 42 import sys |
| 39 | 43 |
| 40 | 44 |
| 41 PY2 = sys.version_info[0] < 3 | 45 PY2 = sys.version_info[0] < 3 |
| 46 | |
| 47 if PY2: | |
| 48 PATH_TYPES = (unicode, str) # noqa: F821 (undefined name 'unicode') | |
| 49 else: | |
| 50 if pathlib: | |
| 51 PATH_TYPES = (str, bytes, pathlib.Path) | |
| 52 else: | |
| 53 PATH_TYPES = (str, bytes) | |
| 42 | 54 |
| 43 CHUNK_SIZE = 1024*1024 | 55 CHUNK_SIZE = 1024*1024 |
| 44 MAP_CHUNK_SIZE = 64*1024*1024 | 56 MAP_CHUNK_SIZE = 64*1024*1024 |
| 45 | 57 |
| 46 | 58 |
| 451 '*' if binary else ' ', | 463 '*' if binary else ' ', |
| 452 '-' if filename is None else normalize_filename(filename)), | 464 '-' if filename is None else normalize_filename(filename)), |
| 453 file=dest) | 465 file=dest) |
| 454 | 466 |
| 455 | 467 |
| 456 def compute_digest_file(hashobj, filename, use_mmap=True): | 468 def compute_digest_file(hashobj, path, use_mmap=True): |
| 457 """ | 469 """ |
| 458 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory | 470 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory |
| 459 :param str filename: filename within the filesystem | 471 :param path: filename within the filesystem or a file descriptor opened in |
| 472 binary mode (also a socket or pipe) | |
| 460 :param bool use_mmap: use the :mod:`mmap` module if available | 473 :param bool use_mmap: use the :mod:`mmap` module if available |
| 461 :return: the digest in binary form | 474 :return: the digest in binary form |
| 462 :rtype: bytes | 475 :rtype: bytes |
| 463 | 476 |
| 477 If a file descriptor is given is must support :func:`os.read`. | |
| 478 | |
| 464 """ | 479 """ |
| 465 h = hashobj() | 480 h = hashobj() |
| 466 flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ | 481 if isinstance(path, PATH_TYPES): |
| 467 | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) | 482 flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ |
| 468 fd = os.open(filename, flags) | 483 | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) |
| 484 fd = os.open(path, flags) | |
| 485 own_fd = True | |
| 486 else: | |
| 487 fd = path | |
| 488 own_fd = False | |
| 469 try: | 489 try: |
| 470 st = os.fstat(fd) | 490 try: |
| 471 filesize = st[stat.ST_SIZE] | 491 st = os.fstat(fd) |
| 492 except TypeError: | |
| 493 # | |
| 494 # "fd" is most probably a Python socket object. | |
| 495 # (a pipe typically supports fstat) | |
| 496 # | |
| 497 use_mmap = False | |
| 498 else: | |
| 499 if stat.S_ISREG(st[stat.ST_MODE]): | |
| 500 filesize = st[stat.ST_SIZE] | |
| 501 else: | |
| 502 use_mmap = False | |
| 472 if mmap is None or not use_mmap: | 503 if mmap is None or not use_mmap: |
| 473 # No mmmap available -> use traditional low-level file IO | 504 # No mmmap available -> use traditional low-level file IO |
| 474 while True: | 505 while True: |
| 475 try: | 506 try: |
| 476 buf = os.read(fd, CHUNK_SIZE) | 507 buf = os.read(fd, CHUNK_SIZE) |
| 509 rest -= mapsize | 540 rest -= mapsize |
| 510 mapoffset += mapsize | 541 mapoffset += mapsize |
| 511 if rest < mapsize: | 542 if rest < mapsize: |
| 512 mapsize = rest | 543 mapsize = rest |
| 513 finally: | 544 finally: |
| 514 os.close(fd) | 545 if own_fd: |
| 546 os.close(fd) | |
| 515 return h.digest() | 547 return h.digest() |
| 516 | 548 |
| 517 | 549 |
| 518 def compute_digest_stream(hashobj, instream): | 550 def compute_digest_stream(hashobj, instream): |
| 519 """ | 551 """ |
