Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/shasum.py @ 118:12339ac2148d
Move some functions into cutils.util (i.e. algorithms and their aliases)
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 29 Dec 2024 18:22:22 +0100 |
| parents | e51f34ad6d71 |
| children | dd4fe912d7e9 |
comparison
equal
deleted
inserted
replaced
| 117:e51f34ad6d71 | 118:12339ac2148d |
|---|---|
| 15 | 15 |
| 16 import argparse | 16 import argparse |
| 17 import base64 | 17 import base64 |
| 18 import binascii | 18 import binascii |
| 19 import errno | 19 import errno |
| 20 import hashlib | |
| 21 import io | 20 import io |
| 22 try: | 21 try: |
| 23 import mmap | 22 import mmap |
| 24 except ImportError: | 23 except ImportError: |
| 25 mmap = None | 24 mmap = None |
| 26 import os | 25 import os |
| 27 import re | 26 import re |
| 28 import stat | 27 import stat |
| 29 import sys | 28 import sys |
| 30 | 29 |
| 30 from . import (__version__, __revision__) | |
| 31 from . import util | |
| 31 from .util import constants | 32 from .util import constants |
| 32 from . import (__version__, __revision__) | |
| 33 | 33 |
| 34 | 34 |
| 35 def main(argv=None): | 35 def main(argv=None): |
| 36 aparser = argparse.ArgumentParser( | 36 aparser = argparse.ArgumentParser( |
| 37 description="Python implementation of shasum", | 37 description="Python implementation of shasum", |
| 38 fromfile_prefix_chars='@') | 38 fromfile_prefix_chars='@') |
| 39 aparser.add_argument( | 39 aparser.add_argument( |
| 40 "--algorithm", "-a", action="store", type=argv2algo, | 40 "--algorithm", "-a", action="store", type=util.argv2algo, |
| 41 help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, blake2, blake2-256, md5") | 41 help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, blake2, blake2-256, md5") |
| 42 aparser.add_argument( | 42 aparser.add_argument( |
| 43 "--base64", action="store_true", | 43 "--base64", action="store_true", |
| 44 help="Output checksums in base64 notation, not hexadecimal (OpenBSD).") | 44 help="Output checksums in base64 notation, not hexadecimal (OpenBSD).") |
| 45 aparser.add_argument( | 45 aparser.add_argument( |
| 107 print("ERROR: only one of --check or --checklist allowed", | 107 print("ERROR: only one of --check or --checklist allowed", |
| 108 file=sys.stderr) | 108 file=sys.stderr) |
| 109 sys.exit(64) # :manpage:`sysexits(3)` EX_USAGE | 109 sys.exit(64) # :manpage:`sysexits(3)` EX_USAGE |
| 110 | 110 |
| 111 if not opts.algorithm: | 111 if not opts.algorithm: |
| 112 opts.algorithm = argv2algo("1") | 112 opts.algorithm = util.argv2algo("1") |
| 113 | 113 |
| 114 opts.dest = None | 114 opts.dest = None |
| 115 | 115 |
| 116 return shasum(opts) | 116 return shasum(opts) |
| 117 | 117 |
| 123 if text_mode: | 123 if text_mode: |
| 124 raise ValueError("text mode not supported") | 124 raise ValueError("text mode not supported") |
| 125 if checklist and check: | 125 if checklist and check: |
| 126 raise ValueError("only one of `checklist' or `check' is allowed") | 126 raise ValueError("only one of `checklist' or `check' is allowed") |
| 127 opts = argparse.Namespace(files=files, | 127 opts = argparse.Namespace(files=files, |
| 128 algorithm=(algotag2algotype(algorithm), | 128 algorithm=(util.algotag2algotype(algorithm), |
| 129 algorithm), | 129 algorithm), |
| 130 bsd=bsd, | 130 bsd=bsd, |
| 131 checklist=checklist, | 131 checklist=checklist, |
| 132 check=check, | 132 check=check, |
| 133 text_mode=False, | 133 text_mode=False, |
| 344 def get_parsed_digest_line_from_checklist(checklist, opts, filename): | 344 def get_parsed_digest_line_from_checklist(checklist, opts, filename): |
| 345 if filename is None: | 345 if filename is None: |
| 346 filenames = ("-", "stdin", "", ) | 346 filenames = ("-", "stdin", "", ) |
| 347 else: | 347 else: |
| 348 filenames = ( | 348 filenames = ( |
| 349 normalize_filename(filename, strip_leading_dot_slash=True),) | 349 util.normalize_filename(filename, strip_leading_dot_slash=True),) |
| 350 with io.open(checklist, "rt", encoding="utf-8") as clf: | 350 with io.open(checklist, "rt", encoding="utf-8") as clf: |
| 351 for checkline in clf: | 351 for checkline in clf: |
| 352 if not checkline: | 352 if not checkline: |
| 353 continue | 353 continue |
| 354 parts = parse_digest_line(opts, checkline) | 354 parts = parse_digest_line(opts, checkline) |
| 356 raise ValueError( | 356 raise ValueError( |
| 357 "improperly formatted digest line: {}".format(checkline)) | 357 "improperly formatted digest line: {}".format(checkline)) |
| 358 if parts[0] in ("SIZE", "TIMESTAMP"): | 358 if parts[0] in ("SIZE", "TIMESTAMP"): |
| 359 assert opts.allow_distinfo | 359 assert opts.allow_distinfo |
| 360 continue | 360 continue |
| 361 fn = normalize_filename(parts[2], strip_leading_dot_slash=True) | 361 fn = util.normalize_filename(parts[2], strip_leading_dot_slash=True) |
| 362 if fn in filenames: | 362 if fn in filenames: |
| 363 return parts | 363 return parts |
| 364 else: | 364 else: |
| 365 return None | 365 return None |
| 366 | 366 |
| 387 # (tag, algorithm, filename, digest) | 387 # (tag, algorithm, filename, digest) |
| 388 if opts.allow_distinfo: | 388 if opts.allow_distinfo: |
| 389 if mo.group(1) == "SIZE": | 389 if mo.group(1) == "SIZE": |
| 390 return ("SIZE", None, None, mo.group(3)) | 390 return ("SIZE", None, None, mo.group(3)) |
| 391 return (mo.group(1), | 391 return (mo.group(1), |
| 392 algotag2algotype(mo.group(1)), | 392 util.algotag2algotype(mo.group(1)), |
| 393 mo.group(2), | 393 mo.group(2), |
| 394 mo.group(3)) | 394 mo.group(3)) |
| 395 else: | 395 else: |
| 396 if opts.allow_distinfo: | 396 if opts.allow_distinfo: |
| 397 mo = re.search(r"\ATIMESTAMP\s*=\s*([0-9]+)\s*\n\Z", line) | 397 mo = re.search(r"\ATIMESTAMP\s*=\s*([0-9]+)\s*\n\Z", line) |
| 408 mo.group(1)) | 408 mo.group(1)) |
| 409 else: | 409 else: |
| 410 return None | 410 return None |
| 411 | 411 |
| 412 | 412 |
| 413 def get_blake2b(): | |
| 414 """Get the factory for blake2b""" | |
| 415 try: | |
| 416 return hashlib.blake2b | |
| 417 except AttributeError: | |
| 418 import pyblake2 | |
| 419 return pyblake2.blake2b | |
| 420 | |
| 421 | |
| 422 def get_blake2s(): | |
| 423 """Get the factory for blake2s""" | |
| 424 try: | |
| 425 return hashlib.blake2s | |
| 426 except AttributeError: | |
| 427 import pyblake2 | |
| 428 return pyblake2.blake2s | |
| 429 | |
| 430 | |
| 431 def get_blake2_256(): | |
| 432 """Get the factory for blake2-256""" | |
| 433 | |
| 434 try: | |
| 435 hashlib.blake2b | |
| 436 except AttributeError: | |
| 437 import pyblake2 | |
| 438 | |
| 439 def _get_blake(): | |
| 440 return pyblake2.blake2b(digest_size=32) | |
| 441 | |
| 442 else: | |
| 443 | |
| 444 def _get_blake(): | |
| 445 return hashlib.blake2b(digest_size=32) | |
| 446 | |
| 447 return _get_blake | |
| 448 | |
| 449 | |
| 450 def argv2algo(s): | |
| 451 """Convert a command line algorithm specifier into a tuple with the | |
| 452 type/factory of the digest and the algorithms tag for output purposes. | |
| 453 | |
| 454 :param str s: the specifier from the commane line | |
| 455 :return: the internal digest specification | |
| 456 :rtype: a tuple (digest_type_or_factory, name_in_output) | |
| 457 | |
| 458 String comparisons are done case-insensitively. | |
| 459 | |
| 460 """ | |
| 461 s = s.lower() | |
| 462 if s in ("1", "sha1"): | |
| 463 return (hashlib.sha1, "SHA1") | |
| 464 elif s in ("224", "sha224"): | |
| 465 return (hashlib.sha224, "SHA224") | |
| 466 elif s in ("256", "sha256"): | |
| 467 return (hashlib.sha256, "SHA256") | |
| 468 elif s in ("384", "sha384"): | |
| 469 return (hashlib.sha384, "SHA384") | |
| 470 elif s in ("512", "sha512"): | |
| 471 return (hashlib.sha512, "SHA512") | |
| 472 elif s in ("3-224", "sha3-224"): | |
| 473 return (hashlib.sha3_224, "SHA3-224") | |
| 474 elif s in ("3-256", "sha3-256"): | |
| 475 return (hashlib.sha3_256, "SHA3-256") | |
| 476 elif s in ("3-384", "sha3-384"): | |
| 477 return (hashlib.sha3_384, "SHA3-384") | |
| 478 elif s in ("3-512", "sha3-512"): | |
| 479 return (hashlib.sha3_512, "SHA3-512") | |
| 480 elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"): | |
| 481 return (get_blake2b(), "BLAKE2b") | |
| 482 elif s in ("blake2s", "blake2s-256"): | |
| 483 return (get_blake2s(), "BLAKE2s") | |
| 484 elif s in ("blake2-256", "blake2b-256"): | |
| 485 return (get_blake2_256(), "BLAKE2b-256") | |
| 486 elif s == "md5": | |
| 487 return (hashlib.md5, "MD5") | |
| 488 else: | |
| 489 raise argparse.ArgumentTypeError( | |
| 490 "`{}' is not a recognized algorithm".format(s)) | |
| 491 | |
| 492 | |
| 493 def algotag2algotype(s): | |
| 494 """Convert the algorithm specifier in a BSD-style digest file to the | |
| 495 type/factory of the corresponding algorithm. | |
| 496 | |
| 497 :param str s: the tag (i.e. normalized name) or the algorithm | |
| 498 :return: the digest type or factory for `s` | |
| 499 | |
| 500 All string comparisons are case-sensitive. | |
| 501 | |
| 502 """ | |
| 503 if s == "SHA1": | |
| 504 return hashlib.sha1 | |
| 505 elif s == "SHA224": | |
| 506 return hashlib.sha224 | |
| 507 elif s == "SHA256": | |
| 508 return hashlib.sha256 | |
| 509 elif s == "SHA384": | |
| 510 return hashlib.sha384 | |
| 511 elif s == "SHA512": | |
| 512 return hashlib.sha512 | |
| 513 elif s == "SHA3-224": | |
| 514 return hashlib.sha3_224 | |
| 515 elif s == "SHA3-256": | |
| 516 return hashlib.sha3_256 | |
| 517 elif s == "SHA3-384": | |
| 518 return hashlib.sha3_384 | |
| 519 elif s == "SHA3-512": | |
| 520 return hashlib.sha3_512 | |
| 521 elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"): # compat for openssl | |
| 522 return get_blake2b() | |
| 523 elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"): # compat for openssl | |
| 524 return get_blake2s() | |
| 525 elif s in ("BLAKE2b-256", "BLAKE2b256"): # also compat for openssl dgst | |
| 526 return get_blake2_256() | |
| 527 elif s == "MD5": | |
| 528 return hashlib.md5 | |
| 529 else: | |
| 530 raise ValueError("unknown algorithm: {}".format(s)) | |
| 531 | |
| 532 | |
| 533 def out_bsd(dest, digest, filename, digestname, binary, use_base64): | 413 def out_bsd(dest, digest, filename, digestname, binary, use_base64): |
| 534 """BSD format output, also :command:`openssl dgst` and | 414 """BSD format output, also :command:`openssl dgst` and |
| 535 :command:`b2sum --tag" format output | 415 :command:`b2sum --tag" format output |
| 536 | 416 |
| 537 """ | 417 """ |
| 541 digest = binascii.hexlify(digest).decode("ascii") | 421 digest = binascii.hexlify(digest).decode("ascii") |
| 542 if filename is None: | 422 if filename is None: |
| 543 print(digest, file=dest) | 423 print(digest, file=dest) |
| 544 else: | 424 else: |
| 545 print("{} ({}) = {}".format(digestname, | 425 print("{} ({}) = {}".format(digestname, |
| 546 normalize_filename(filename), | 426 util.normalize_filename(filename), |
| 547 digest), | 427 digest), |
| 548 file=dest) | 428 file=dest) |
| 549 | 429 |
| 550 | 430 |
| 551 def out_std(dest, digest, filename, digestname, binary, use_base64): | 431 def out_std(dest, digest, filename, digestname, binary, use_base64): |
| 557 else: | 437 else: |
| 558 digest = binascii.hexlify(digest).decode("ascii") | 438 digest = binascii.hexlify(digest).decode("ascii") |
| 559 print("{} {}{}".format( | 439 print("{} {}{}".format( |
| 560 digest, | 440 digest, |
| 561 '*' if binary else ' ', | 441 '*' if binary else ' ', |
| 562 '-' if filename is None else normalize_filename(filename)), | 442 '-' if filename is None else util.normalize_filename(filename)), |
| 563 file=dest) | 443 file=dest) |
| 564 | 444 |
| 565 | 445 |
| 566 def compute_digest_file(hashobj, path, use_mmap=None): | 446 def compute_digest_file(hashobj, path, use_mmap=None): |
| 567 """ | 447 """ |
| 709 break | 589 break |
| 710 h.update(buf) | 590 h.update(buf) |
| 711 return h.digest() | 591 return h.digest() |
| 712 | 592 |
| 713 | 593 |
| 714 def normalize_filename(filename, strip_leading_dot_slash=False): | |
| 715 filename = filename.replace("\\", "/") | |
| 716 if strip_leading_dot_slash: | |
| 717 while filename.startswith("./"): | |
| 718 filename = filename[2:] | |
| 719 return filename | |
| 720 | |
| 721 | |
| 722 if __name__ == "__main__": | 594 if __name__ == "__main__": |
| 723 sys.exit(main()) | 595 sys.exit(main()) |
