comparison cutils/shasum.py @ 118:12339ac2148d

Move some functions into cutils.util (i.e. algorithms and their aliases)
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 29 Dec 2024 18:22:22 +0100
parents e51f34ad6d71
children dd4fe912d7e9
comparison
equal deleted inserted replaced
117:e51f34ad6d71 118:12339ac2148d
15 15
16 import argparse 16 import argparse
17 import base64 17 import base64
18 import binascii 18 import binascii
19 import errno 19 import errno
20 import hashlib
21 import io 20 import io
22 try: 21 try:
23 import mmap 22 import mmap
24 except ImportError: 23 except ImportError:
25 mmap = None 24 mmap = None
26 import os 25 import os
27 import re 26 import re
28 import stat 27 import stat
29 import sys 28 import sys
30 29
30 from . import (__version__, __revision__)
31 from . import util
31 from .util import constants 32 from .util import constants
32 from . import (__version__, __revision__)
33 33
34 34
35 def main(argv=None): 35 def main(argv=None):
36 aparser = argparse.ArgumentParser( 36 aparser = argparse.ArgumentParser(
37 description="Python implementation of shasum", 37 description="Python implementation of shasum",
38 fromfile_prefix_chars='@') 38 fromfile_prefix_chars='@')
39 aparser.add_argument( 39 aparser.add_argument(
40 "--algorithm", "-a", action="store", type=argv2algo, 40 "--algorithm", "-a", action="store", type=util.argv2algo,
41 help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, blake2, blake2-256, md5") 41 help="1 (default), 224, 256, 384, 512, 3-224, 3-256, 3-384, 3-512, blake2b, blake2s, blake2, blake2-256, md5")
42 aparser.add_argument( 42 aparser.add_argument(
43 "--base64", action="store_true", 43 "--base64", action="store_true",
44 help="Output checksums in base64 notation, not hexadecimal (OpenBSD).") 44 help="Output checksums in base64 notation, not hexadecimal (OpenBSD).")
45 aparser.add_argument( 45 aparser.add_argument(
107 print("ERROR: only one of --check or --checklist allowed", 107 print("ERROR: only one of --check or --checklist allowed",
108 file=sys.stderr) 108 file=sys.stderr)
109 sys.exit(64) # :manpage:`sysexits(3)` EX_USAGE 109 sys.exit(64) # :manpage:`sysexits(3)` EX_USAGE
110 110
111 if not opts.algorithm: 111 if not opts.algorithm:
112 opts.algorithm = argv2algo("1") 112 opts.algorithm = util.argv2algo("1")
113 113
114 opts.dest = None 114 opts.dest = None
115 115
116 return shasum(opts) 116 return shasum(opts)
117 117
123 if text_mode: 123 if text_mode:
124 raise ValueError("text mode not supported") 124 raise ValueError("text mode not supported")
125 if checklist and check: 125 if checklist and check:
126 raise ValueError("only one of `checklist' or `check' is allowed") 126 raise ValueError("only one of `checklist' or `check' is allowed")
127 opts = argparse.Namespace(files=files, 127 opts = argparse.Namespace(files=files,
128 algorithm=(algotag2algotype(algorithm), 128 algorithm=(util.algotag2algotype(algorithm),
129 algorithm), 129 algorithm),
130 bsd=bsd, 130 bsd=bsd,
131 checklist=checklist, 131 checklist=checklist,
132 check=check, 132 check=check,
133 text_mode=False, 133 text_mode=False,
344 def get_parsed_digest_line_from_checklist(checklist, opts, filename): 344 def get_parsed_digest_line_from_checklist(checklist, opts, filename):
345 if filename is None: 345 if filename is None:
346 filenames = ("-", "stdin", "", ) 346 filenames = ("-", "stdin", "", )
347 else: 347 else:
348 filenames = ( 348 filenames = (
349 normalize_filename(filename, strip_leading_dot_slash=True),) 349 util.normalize_filename(filename, strip_leading_dot_slash=True),)
350 with io.open(checklist, "rt", encoding="utf-8") as clf: 350 with io.open(checklist, "rt", encoding="utf-8") as clf:
351 for checkline in clf: 351 for checkline in clf:
352 if not checkline: 352 if not checkline:
353 continue 353 continue
354 parts = parse_digest_line(opts, checkline) 354 parts = parse_digest_line(opts, checkline)
356 raise ValueError( 356 raise ValueError(
357 "improperly formatted digest line: {}".format(checkline)) 357 "improperly formatted digest line: {}".format(checkline))
358 if parts[0] in ("SIZE", "TIMESTAMP"): 358 if parts[0] in ("SIZE", "TIMESTAMP"):
359 assert opts.allow_distinfo 359 assert opts.allow_distinfo
360 continue 360 continue
361 fn = normalize_filename(parts[2], strip_leading_dot_slash=True) 361 fn = util.normalize_filename(parts[2], strip_leading_dot_slash=True)
362 if fn in filenames: 362 if fn in filenames:
363 return parts 363 return parts
364 else: 364 else:
365 return None 365 return None
366 366
387 # (tag, algorithm, filename, digest) 387 # (tag, algorithm, filename, digest)
388 if opts.allow_distinfo: 388 if opts.allow_distinfo:
389 if mo.group(1) == "SIZE": 389 if mo.group(1) == "SIZE":
390 return ("SIZE", None, None, mo.group(3)) 390 return ("SIZE", None, None, mo.group(3))
391 return (mo.group(1), 391 return (mo.group(1),
392 algotag2algotype(mo.group(1)), 392 util.algotag2algotype(mo.group(1)),
393 mo.group(2), 393 mo.group(2),
394 mo.group(3)) 394 mo.group(3))
395 else: 395 else:
396 if opts.allow_distinfo: 396 if opts.allow_distinfo:
397 mo = re.search(r"\ATIMESTAMP\s*=\s*([0-9]+)\s*\n\Z", line) 397 mo = re.search(r"\ATIMESTAMP\s*=\s*([0-9]+)\s*\n\Z", line)
408 mo.group(1)) 408 mo.group(1))
409 else: 409 else:
410 return None 410 return None
411 411
412 412
413 def get_blake2b():
414 """Get the factory for blake2b"""
415 try:
416 return hashlib.blake2b
417 except AttributeError:
418 import pyblake2
419 return pyblake2.blake2b
420
421
422 def get_blake2s():
423 """Get the factory for blake2s"""
424 try:
425 return hashlib.blake2s
426 except AttributeError:
427 import pyblake2
428 return pyblake2.blake2s
429
430
431 def get_blake2_256():
432 """Get the factory for blake2-256"""
433
434 try:
435 hashlib.blake2b
436 except AttributeError:
437 import pyblake2
438
439 def _get_blake():
440 return pyblake2.blake2b(digest_size=32)
441
442 else:
443
444 def _get_blake():
445 return hashlib.blake2b(digest_size=32)
446
447 return _get_blake
448
449
450 def argv2algo(s):
451 """Convert a command line algorithm specifier into a tuple with the
452 type/factory of the digest and the algorithms tag for output purposes.
453
454 :param str s: the specifier from the commane line
455 :return: the internal digest specification
456 :rtype: a tuple (digest_type_or_factory, name_in_output)
457
458 String comparisons are done case-insensitively.
459
460 """
461 s = s.lower()
462 if s in ("1", "sha1"):
463 return (hashlib.sha1, "SHA1")
464 elif s in ("224", "sha224"):
465 return (hashlib.sha224, "SHA224")
466 elif s in ("256", "sha256"):
467 return (hashlib.sha256, "SHA256")
468 elif s in ("384", "sha384"):
469 return (hashlib.sha384, "SHA384")
470 elif s in ("512", "sha512"):
471 return (hashlib.sha512, "SHA512")
472 elif s in ("3-224", "sha3-224"):
473 return (hashlib.sha3_224, "SHA3-224")
474 elif s in ("3-256", "sha3-256"):
475 return (hashlib.sha3_256, "SHA3-256")
476 elif s in ("3-384", "sha3-384"):
477 return (hashlib.sha3_384, "SHA3-384")
478 elif s in ("3-512", "sha3-512"):
479 return (hashlib.sha3_512, "SHA3-512")
480 elif s in ("blake2b", "blake2b-512", "blake2", "blake2-512"):
481 return (get_blake2b(), "BLAKE2b")
482 elif s in ("blake2s", "blake2s-256"):
483 return (get_blake2s(), "BLAKE2s")
484 elif s in ("blake2-256", "blake2b-256"):
485 return (get_blake2_256(), "BLAKE2b-256")
486 elif s == "md5":
487 return (hashlib.md5, "MD5")
488 else:
489 raise argparse.ArgumentTypeError(
490 "`{}' is not a recognized algorithm".format(s))
491
492
493 def algotag2algotype(s):
494 """Convert the algorithm specifier in a BSD-style digest file to the
495 type/factory of the corresponding algorithm.
496
497 :param str s: the tag (i.e. normalized name) or the algorithm
498 :return: the digest type or factory for `s`
499
500 All string comparisons are case-sensitive.
501
502 """
503 if s == "SHA1":
504 return hashlib.sha1
505 elif s == "SHA224":
506 return hashlib.sha224
507 elif s == "SHA256":
508 return hashlib.sha256
509 elif s == "SHA384":
510 return hashlib.sha384
511 elif s == "SHA512":
512 return hashlib.sha512
513 elif s == "SHA3-224":
514 return hashlib.sha3_224
515 elif s == "SHA3-256":
516 return hashlib.sha3_256
517 elif s == "SHA3-384":
518 return hashlib.sha3_384
519 elif s == "SHA3-512":
520 return hashlib.sha3_512
521 elif s in ("BLAKE2b", "BLAKE2b-512", "BLAKE2b512"): # compat for openssl
522 return get_blake2b()
523 elif s in ("BLAKE2s", "BLAKE2s-256", "BLAKE2s256"): # compat for openssl
524 return get_blake2s()
525 elif s in ("BLAKE2b-256", "BLAKE2b256"): # also compat for openssl dgst
526 return get_blake2_256()
527 elif s == "MD5":
528 return hashlib.md5
529 else:
530 raise ValueError("unknown algorithm: {}".format(s))
531
532
533 def out_bsd(dest, digest, filename, digestname, binary, use_base64): 413 def out_bsd(dest, digest, filename, digestname, binary, use_base64):
534 """BSD format output, also :command:`openssl dgst` and 414 """BSD format output, also :command:`openssl dgst` and
535 :command:`b2sum --tag" format output 415 :command:`b2sum --tag" format output
536 416
537 """ 417 """
541 digest = binascii.hexlify(digest).decode("ascii") 421 digest = binascii.hexlify(digest).decode("ascii")
542 if filename is None: 422 if filename is None:
543 print(digest, file=dest) 423 print(digest, file=dest)
544 else: 424 else:
545 print("{} ({}) = {}".format(digestname, 425 print("{} ({}) = {}".format(digestname,
546 normalize_filename(filename), 426 util.normalize_filename(filename),
547 digest), 427 digest),
548 file=dest) 428 file=dest)
549 429
550 430
551 def out_std(dest, digest, filename, digestname, binary, use_base64): 431 def out_std(dest, digest, filename, digestname, binary, use_base64):
557 else: 437 else:
558 digest = binascii.hexlify(digest).decode("ascii") 438 digest = binascii.hexlify(digest).decode("ascii")
559 print("{} {}{}".format( 439 print("{} {}{}".format(
560 digest, 440 digest,
561 '*' if binary else ' ', 441 '*' if binary else ' ',
562 '-' if filename is None else normalize_filename(filename)), 442 '-' if filename is None else util.normalize_filename(filename)),
563 file=dest) 443 file=dest)
564 444
565 445
566 def compute_digest_file(hashobj, path, use_mmap=None): 446 def compute_digest_file(hashobj, path, use_mmap=None):
567 """ 447 """
709 break 589 break
710 h.update(buf) 590 h.update(buf)
711 return h.digest() 591 return h.digest()
712 592
713 593
714 def normalize_filename(filename, strip_leading_dot_slash=False):
715 filename = filename.replace("\\", "/")
716 if strip_leading_dot_slash:
717 while filename.startswith("./"):
718 filename = filename[2:]
719 return filename
720
721
722 if __name__ == "__main__": 594 if __name__ == "__main__":
723 sys.exit(main()) 595 sys.exit(main())