Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/shasum.py @ 122:1e5127028254
Move the real computation of digests from files and streams into dedicated submodule cutils.util.digest
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Wed, 01 Jan 2025 18:57:25 +0100 |
| parents | a548783381b6 |
| children | a813094ae4f5 |
comparison
equal
deleted
inserted
replaced
| 121:2dc26a2f3d1c | 122:1e5127028254 |
|---|---|
| 1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
| 2 # :- | 2 # :- |
| 3 # :Copyright: (c) 2020-2024 Franz Glasner | 3 # :Copyright: (c) 2020-2025 Franz Glasner |
| 4 # :License: BSD-3-Clause | 4 # :License: BSD-3-Clause |
| 5 # :- | 5 # :- |
| 6 r"""Pure Python implementation of `shasum`. | 6 r"""Pure Python implementation of `shasum`. |
| 7 | 7 |
| 8 """ | 8 """ |
| 16 import argparse | 16 import argparse |
| 17 import base64 | 17 import base64 |
| 18 import binascii | 18 import binascii |
| 19 import errno | 19 import errno |
| 20 import io | 20 import io |
| 21 try: | |
| 22 import mmap | |
| 23 except ImportError: | |
| 24 mmap = None | |
| 25 import os | 21 import os |
| 26 import re | 22 import re |
| 27 import stat | |
| 28 import sys | 23 import sys |
| 29 | 24 |
| 30 from . import (__version__, __revision__) | 25 from . import (__version__, __revision__) |
| 31 from . import util | 26 from . import util |
| 32 from .util import constants | 27 from .util import constants |
| 28 from .util import digest | |
| 33 | 29 |
| 34 | 30 |
| 35 def main(argv=None): | 31 def main(argv=None): |
| 36 aparser = argparse.ArgumentParser( | 32 aparser = argparse.ArgumentParser( |
| 37 description="Python implementation of shasum", | 33 description="Python implementation of shasum", |
| 168 dirnames.sort() | 164 dirnames.sort() |
| 169 dirfiles.sort() | 165 dirfiles.sort() |
| 170 for fn in dirfiles: | 166 for fn in dirfiles: |
| 171 path = os.path.join(dirpath, fn) | 167 path = os.path.join(dirpath, fn) |
| 172 out(opts.dest or sys.stdout, | 168 out(opts.dest or sys.stdout, |
| 173 compute_digest_file(opts.algorithm[0], path, | 169 digest.compute_digest_file( |
| 174 use_mmap=opts.mmap), | 170 opts.algorithm[0], path, use_mmap=opts.mmap), |
| 175 path, | 171 path, |
| 176 opts.algorithm[1], | 172 opts.algorithm[1], |
| 177 True, | 173 True, |
| 178 opts.base64) | 174 opts.base64) |
| 179 else: | 175 else: |
| 184 msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) | 180 msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) |
| 185 source = sys.stdin | 181 source = sys.stdin |
| 186 else: | 182 else: |
| 187 source = sys.stdin.buffer | 183 source = sys.stdin.buffer |
| 188 out(sys.stdout, | 184 out(sys.stdout, |
| 189 compute_digest_stream(opts.algorithm[0], source), | 185 digest.compute_digest_stream(opts.algorithm[0], source), |
| 190 None, | 186 None, |
| 191 opts.algorithm[1], | 187 opts.algorithm[1], |
| 192 True, | 188 True, |
| 193 opts.base64) | 189 opts.base64) |
| 194 else: | 190 else: |
| 195 for fn in opts.files: | 191 for fn in opts.files: |
| 196 out(opts.dest or sys.stdout, | 192 out(opts.dest or sys.stdout, |
| 197 compute_digest_file(opts.algorithm[0], fn, | 193 digest.compute_digest_file( |
| 198 use_mmap=opts.mmap), | 194 opts.algorithm[0], fn, use_mmap=opts.mmap), |
| 199 fn, | 195 fn, |
| 200 opts.algorithm[1], | 196 opts.algorithm[1], |
| 201 True, | 197 True, |
| 202 opts.base64) | 198 opts.base64) |
| 203 return 0 | 199 return 0 |
| 258 if pl is None: | 254 if pl is None: |
| 259 exit_code = 1 | 255 exit_code = 1 |
| 260 print("-: MISSING", file=dest) | 256 print("-: MISSING", file=dest) |
| 261 else: | 257 else: |
| 262 tag, algo, cl_filename, cl_digest = pl | 258 tag, algo, cl_filename, cl_digest = pl |
| 263 computed_digest = compute_digest_stream(algo, source) | 259 computed_digest = digest.compute_digest_stream(algo, source) |
| 264 if compare_digests_equal(computed_digest, cl_digest, algo): | 260 if compare_digests_equal(computed_digest, cl_digest, algo): |
| 265 res = "OK" | 261 res = "OK" |
| 266 else: | 262 else: |
| 267 res = "FAILED" | 263 res = "FAILED" |
| 268 exit_code = 1 | 264 exit_code = 1 |
| 273 if pl is None: | 269 if pl is None: |
| 274 print("{}: MISSING".format(fn), file=dest) | 270 print("{}: MISSING".format(fn), file=dest) |
| 275 exit_code = 1 | 271 exit_code = 1 |
| 276 else: | 272 else: |
| 277 tag, algo, cl_filename, cl_digest = pl | 273 tag, algo, cl_filename, cl_digest = pl |
| 278 computed_digest = compute_digest_file(algo, fn, | 274 computed_digest = digest.compute_digest_file( |
| 279 use_mmap=opts.mmap) | 275 algo, fn, use_mmap=opts.mmap) |
| 280 if compare_digests_equal(computed_digest, cl_digest, algo): | 276 if compare_digests_equal(computed_digest, cl_digest, algo): |
| 281 res = "OK" | 277 res = "OK" |
| 282 else: | 278 else: |
| 283 exit_code = 1 | 279 exit_code = 1 |
| 284 res = "FAILED" | 280 res = "FAILED" |
| 330 tag, algo, fn, digest = parts | 326 tag, algo, fn, digest = parts |
| 331 if tag in ("SIZE", "TIMESTAMP"): | 327 if tag in ("SIZE", "TIMESTAMP"): |
| 332 assert opts.allow_distinfo | 328 assert opts.allow_distinfo |
| 333 return (None, None, tag) | 329 return (None, None, tag) |
| 334 try: | 330 try: |
| 335 d = compute_digest_file(algo, fn, use_mmap=opts.mmap) | 331 d = digest.compute_digest_file(algo, fn, use_mmap=opts.mmap) |
| 336 if compare_digests_equal(d, digest, algo): | 332 if compare_digests_equal(d, digest, algo): |
| 337 return ("ok", fn, tag) | 333 return ("ok", fn, tag) |
| 338 else: | 334 else: |
| 339 return ("failed", fn, tag) | 335 return ("failed", fn, tag) |
| 340 except EnvironmentError: | 336 except EnvironmentError: |
| 441 '*' if binary else ' ', | 437 '*' if binary else ' ', |
| 442 '-' if filename is None else util.normalize_filename(filename)), | 438 '-' if filename is None else util.normalize_filename(filename)), |
| 443 file=dest) | 439 file=dest) |
| 444 | 440 |
| 445 | 441 |
| 446 def compute_digest_file(hashobj, path, use_mmap=None): | |
| 447 """ | |
| 448 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory | |
| 449 :param path: filename within the filesystem or a file descriptor opened in | |
| 450 binary mode (also a socket or pipe) | |
| 451 :param use_mmap: Use the :mod:`mmap` module if available. | |
| 452 If `None` determine automatically. | |
| 453 :type use_mmap: bool or None | |
| 454 :return: the digest in binary form | |
| 455 :rtype: bytes | |
| 456 | |
| 457 If a file descriptor is given is must support :func:`os.read`. | |
| 458 | |
| 459 """ | |
| 460 h = hashobj() | |
| 461 if isinstance(path, constants.PATH_TYPES): | |
| 462 flags = os.O_RDONLY | getattr(os, "O_BINARY", 0) \ | |
| 463 | getattr(os, "O_SEQUENTIAL", 0) | getattr(os, "O_NOCTTY", 0) | |
| 464 fd = os.open(path, flags) | |
| 465 own_fd = True | |
| 466 else: | |
| 467 fd = path | |
| 468 own_fd = False | |
| 469 try: | |
| 470 try: | |
| 471 st = os.fstat(fd) | |
| 472 except TypeError: | |
| 473 # | |
| 474 # "fd" is most probably a Python socket object. | |
| 475 # (a pipe typically supports fstat) | |
| 476 # | |
| 477 use_mmap = False | |
| 478 else: | |
| 479 if stat.S_ISREG(st[stat.ST_MODE]): | |
| 480 filesize = st[stat.ST_SIZE] | |
| 481 if (use_mmap is None) \ | |
| 482 and (filesize > constants.MAX_AUTO_MAP_SIZE): | |
| 483 # | |
| 484 # This is borrowed from FreeBSD's cp(1) implementation: | |
| 485 # Mmap and process if less than 8M (the limit is | |
| 486 # so we don't totally trash memory on big files. | |
| 487 # This is really a minor hack, but it wins some | |
| 488 # CPU back. Some filesystems, such as smbnetfs, | |
| 489 # don't support mmap, so this is a best-effort | |
| 490 # attempt. | |
| 491 # | |
| 492 use_mmap = False | |
| 493 else: | |
| 494 use_mmap = False | |
| 495 if use_mmap is None: | |
| 496 use_mmap = True | |
| 497 if mmap is None or not use_mmap: | |
| 498 # No mmap available or wanted -> use traditional low-level file IO | |
| 499 fadvise = getattr(os, "posix_fadvise", None) | |
| 500 if fadvise: | |
| 501 fadvise(fd, 0, 0, os.POSIX_FADV_SEQUENTIAL) | |
| 502 if not constants.PY2: | |
| 503 fileobj = io.FileIO(fd, mode="r", closefd=False) | |
| 504 buf = bytearray(constants.READ_CHUNK_SIZE) | |
| 505 with memoryview(buf) as full_view: | |
| 506 while True: | |
| 507 try: | |
| 508 n = fileobj.readinto(buf) | |
| 509 except OSError as e: | |
| 510 if e.errno not in (errno.EAGAIN, | |
| 511 errno.EWOULDBLOCK, | |
| 512 errno.EINTR): | |
| 513 raise | |
| 514 else: | |
| 515 if n == 0: | |
| 516 break | |
| 517 if n == constants.READ_CHUNK_SIZE: | |
| 518 h.update(buf) | |
| 519 else: | |
| 520 with full_view[:n] as partial_view: | |
| 521 h.update(partial_view) | |
| 522 else: | |
| 523 while True: | |
| 524 try: | |
| 525 buf = os.read(fd, constants.READ_CHUNK_SIZE) | |
| 526 except OSError as e: | |
| 527 if e.errno not in (errno.EAGAIN, | |
| 528 errno.EWOULDBLOCK, | |
| 529 errno.EINTR): | |
| 530 raise | |
| 531 else: | |
| 532 if len(buf) == 0: | |
| 533 break | |
| 534 h.update(buf) | |
| 535 else: | |
| 536 # | |
| 537 # Use mmap | |
| 538 # | |
| 539 # NOTE: On Windows mmapped files with length 0 are not supported. | |
| 540 # So ensure to not call mmap.mmap() if the file size is 0. | |
| 541 # | |
| 542 madvise = getattr(mmap.mmap, "madvise", None) | |
| 543 if filesize <= constants.MAP_WINDOW_SIZE: | |
| 544 mapsize = filesize | |
| 545 else: | |
| 546 mapsize = constants.MAP_WINDOW_SIZE | |
| 547 mapoffset = 0 | |
| 548 rest = filesize | |
| 549 while rest > 0: | |
| 550 m = mmap.mmap(fd, | |
| 551 mapsize, | |
| 552 access=mmap.ACCESS_READ, | |
| 553 offset=mapoffset) | |
| 554 if madvise: | |
| 555 madvise(m, mmap.MADV_SEQUENTIAL) | |
| 556 try: | |
| 557 h.update(m) | |
| 558 finally: | |
| 559 m.close() | |
| 560 rest -= mapsize | |
| 561 mapoffset += mapsize | |
| 562 if rest < mapsize: | |
| 563 mapsize = rest | |
| 564 finally: | |
| 565 if own_fd: | |
| 566 os.close(fd) | |
| 567 return h.digest() | |
| 568 | |
| 569 | |
| 570 def compute_digest_stream(hashobj, instream): | |
| 571 """ | |
| 572 | |
| 573 :param hashobj: a :mod:`hashlib` compatible hash algorithm type or factory | |
| 574 :param instream: a bytes input stream to read the data to be hashed from | |
| 575 :return: the digest in binary form | |
| 576 :rtype: bytes | |
| 577 | |
| 578 """ | |
| 579 h = hashobj() | |
| 580 while True: | |
| 581 try: | |
| 582 buf = instream.read(constants.READ_CHUNK_SIZE) | |
| 583 except OSError as e: | |
| 584 if e.errno not in (errno.EAGAIN, errno.EWOULDBLOCK, errno.EINTR): | |
| 585 raise | |
| 586 else: | |
| 587 if buf is not None: | |
| 588 if len(buf) == 0: | |
| 589 break | |
| 590 h.update(buf) | |
| 591 return h.digest() | |
| 592 | |
| 593 | |
| 594 if __name__ == "__main__": | 442 if __name__ == "__main__": |
| 595 sys.exit(main()) | 443 sys.exit(main()) |
