Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 204:07f1d79e6674
Fully implemented UTF-8 mode for treeview.
While doing this refactored "normal" mode (using the filesystem encoding).
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Tue, 21 Jan 2025 20:31:48 +0100 |
| parents | b9b38584919b |
| children | 63088d3675bb |
comparison
equal
deleted
inserted
replaced
| 203:3a85f7bbe0b1 | 204:07f1d79e6674 |
|---|---|
| 404 self._outfp.write( | 404 self._outfp.write( |
| 405 format_bsd_line("COMMENT", None, line, False)) | 405 format_bsd_line("COMMENT", None, line, False)) |
| 406 | 406 |
| 407 if self._minimal is not None: | 407 if self._minimal is not None: |
| 408 self._outfp.write(format_bsd_line( | 408 self._outfp.write(format_bsd_line( |
| 409 "ROOT", None, self._minimal if self._minimal else "", False)) | 409 "ROOT", |
| 410 else: | 410 None, |
| 411 self._outfp.write(format_bsd_line("ROOT", None, root, False)) | 411 (walk.WalkDirEntry.alt_u8(self._minimal) |
| 412 if self._minimal else b""), | |
| 413 False)) | |
| 414 else: | |
| 415 self._outfp.write(format_bsd_line( | |
| 416 "ROOT", None, walk.WalkDirEntry.alt_u8(root), False)) | |
| 412 self._outfp.flush() | 417 self._outfp.flush() |
| 413 | 418 |
| 414 if not self._handle_root_logical and os.path.islink(root): | 419 if not self._handle_root_logical and os.path.islink(root): |
| 415 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) | 420 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) |
| 416 linkdgst = self._algorithm[0]() | 421 linkdgst = self._algorithm[0]() |
| 455 fsobjects.sort(key=walk.WalkDirEntry.alt_sort_key) | 460 fsobjects.sort(key=walk.WalkDirEntry.alt_sort_key) |
| 456 else: | 461 else: |
| 457 fsobjects.sort(key=walk.WalkDirEntry.sort_key) | 462 fsobjects.sort(key=walk.WalkDirEntry.sort_key) |
| 458 dir_dgst = self._algorithm[0]() | 463 dir_dgst = self._algorithm[0]() |
| 459 dir_size = 0 | 464 dir_size = 0 |
| 465 dir_tainted = False | |
| 460 for fso in fsobjects: | 466 for fso in fsobjects: |
| 461 if fso.is_dir: | 467 if fso.is_dir: |
| 462 if fso.is_symlink and not self._follow_directory_symlinks: | 468 if fso.is_symlink and not self._follow_directory_symlinks: |
| 463 linktgt = walk.WalkDirEntry.from_readlink( | 469 linktgt = walk.WalkDirEntry.from_readlink( |
| 464 os.readlink(fso.path)) | 470 os.readlink(fso.path)) |
| 465 # linktgt = util.fsencode(os.readlink(fso.path))) | 471 # linktgt = util.fsencode(os.readlink(fso.path))) |
| 466 linkdgst = self._algorithm[0]() | 472 linkdgst = self._algorithm[0]() |
| 467 linkdgst.update( | 473 if self._utf8_mode: |
| 468 util.interpolate_bytes( | 474 if linktgt.u8path is None: |
| 469 b"%d:%s,", len(linktgt.fspath), linktgt.fspath)) | 475 dir_tainted = True |
| 470 dir_dgst.update(util.interpolate_bytes( | 476 linkdgst.update(util.interpolate_bytes( |
| 471 b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) | 477 b"%d:%s,", |
| 478 len(linktgt.alt_u8path), | |
| 479 linktgt.alt_u8path)) | |
| 480 else: | |
| 481 linkdgst.update(util.interpolate_bytes( | |
| 482 b"%d:%s,", | |
| 483 len(linktgt.u8path), | |
| 484 linktgt.u8path)) | |
| 485 if fso.u8name is None: | |
| 486 dir_tainted = True | |
| 487 dir_dgst.update(util.interpolate_bytes( | |
| 488 b"1:S,%d:%s,", | |
| 489 len(fso.alt_u8name), | |
| 490 fso.alt_u8name)) | |
| 491 else: | |
| 492 dir_dgst.update(util.interpolate_bytes( | |
| 493 b"1:S,%d:%s,", len(fso.u8name), fso.u8name)) | |
| 494 else: | |
| 495 if linktgt.fspath is None: | |
| 496 dir_tainted = True | |
| 497 linkdgst.update(util.interpolate_bytes( | |
| 498 b"%d:%s,", | |
| 499 len(linktgt.alt_fspath), | |
| 500 linktgt.alt_fspath)) | |
| 501 else: | |
| 502 linkdgst.update(util.interpolate_bytes( | |
| 503 b"%d:%s,", | |
| 504 len(linktgt.fspath), | |
| 505 linktgt.fspath)) | |
| 506 if fso.fsname is None: | |
| 507 dir_tainted = True | |
| 508 dir_dgst.update(util.interpolate_bytes( | |
| 509 b"1:S,%d:%s,", | |
| 510 len(fso.alt_fsname), | |
| 511 fso.alt_fsname)) | |
| 512 else: | |
| 513 dir_dgst.update(util.interpolate_bytes( | |
| 514 b"1:S,%d:%s,", len(fso.fsname), fso.fsname)) | |
| 472 # | 515 # |
| 473 # - no mtime and no mode for symlinks | 516 # - no mtime and no mode for symlinks |
| 474 # - also does not count for dir_size | 517 # - also does not count for dir_size |
| 475 # | 518 # |
| 476 dir_dgst.update(util.interpolate_bytes( | 519 dir_dgst.update(util.interpolate_bytes( |
| 477 b"%d:%s,", | 520 b"%d:%s,", |
| 478 len(linkdgst.digest()), linkdgst.digest())) | 521 len(linkdgst.digest()), linkdgst.digest())) |
| 479 opath = "/".join(top) + "/" + fso.name if top else fso.name | 522 opath = join_output_path(top, fso.name) |
| 523 if self._utf8_mode: | |
| 524 opath = walk.WalkDirEntry.alt_u8(opath) | |
| 525 else: | |
| 526 opath = walk.WalkDirEntry.alt_fs(opath) | |
| 480 if self._size_only: | 527 if self._size_only: |
| 481 self._outfp.write(format_bsd_line( | 528 self._outfp.write(format_bsd_line( |
| 482 "SIZE", None, "%s/./@/" % (opath,), False, 0)) | 529 "SIZE", None, "%s/./@/" % (opath,), False, 0)) |
| 483 else: | 530 else: |
| 484 self._outfp.write(format_bsd_line( | 531 self._outfp.write(format_bsd_line( |
| 495 # Get subdir data from recursing into it | 542 # Get subdir data from recursing into it |
| 496 sub_dir_dgst, sub_dir_size = self._generate( | 543 sub_dir_dgst, sub_dir_size = self._generate( |
| 497 root, top + (fso.name, )) | 544 root, top + (fso.name, )) |
| 498 | 545 |
| 499 dir_size += sub_dir_size | 546 dir_size += sub_dir_size |
| 500 dir_dgst.update(util.interpolate_bytes( | 547 if self._utf8_mode: |
| 501 b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) | 548 if fso.u8name is None: |
| 549 dir_tainted = True | |
| 550 dir_dgst.update(util.interpolate_bytes( | |
| 551 b"1:d,%d:%s,", | |
| 552 len(fso.alt_u8name), | |
| 553 fso.alt_u8name)) | |
| 554 else: | |
| 555 dir_dgst.update(util.interpolate_bytes( | |
| 556 b"1:d,%d:%s,", len(fso.u8name), fso.u8name)) | |
| 557 else: | |
| 558 if fso.fsname is None: | |
| 559 dir_tainted = True | |
| 560 dir_dgst.update(util.interpolate_bytes( | |
| 561 b"1:d,%d:%s,", | |
| 562 len(fso.alt_fsname), | |
| 563 fso.alt_fsname)) | |
| 564 else: | |
| 565 dir_dgst.update(util.interpolate_bytes( | |
| 566 b"1:d,%d:%s,", len(fso.fsname), fso.fsname)) | |
| 502 dir_dgst.update(util.interpolate_bytes( | 567 dir_dgst.update(util.interpolate_bytes( |
| 503 b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst)) | 568 b"%d:%s,", len(sub_dir_dgst), sub_dir_dgst)) |
| 504 if self._with_metadata_full_mode: | 569 if self._with_metadata_full_mode: |
| 505 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) | 570 modestr = util.b(normalized_mode_str(fso.stat.st_mode)) |
| 506 dir_dgst.update(util.interpolate_bytes( | 571 dir_dgst.update(util.interpolate_bytes( |
| 509 modestr = util.b(normalized_compatible_mode_str( | 574 modestr = util.b(normalized_compatible_mode_str( |
| 510 fso.stat.st_mode)) | 575 fso.stat.st_mode)) |
| 511 dir_dgst.update(util.interpolate_bytes( | 576 dir_dgst.update(util.interpolate_bytes( |
| 512 b"4:mode,%d:%s,", len(modestr), modestr)) | 577 b"4:mode,%d:%s,", len(modestr), modestr)) |
| 513 else: | 578 else: |
| 514 dir_dgst.update(util.interpolate_bytes( | 579 if self._utf8_mode: |
| 515 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) | 580 if fso.u8name is None: |
| 581 dir_tainted = True | |
| 582 dir_dgst.update(util.interpolate_bytes( | |
| 583 b"1:f,%d:%s,", | |
| 584 len(fso.alt_u8name), | |
| 585 fso.alt_u8name)) | |
| 586 else: | |
| 587 dir_dgst.update(util.interpolate_bytes( | |
| 588 b"1:f,%d:%s,", len(fso.u8name), fso.u8name)) | |
| 589 else: | |
| 590 if fso.fsname is None: | |
| 591 dir_tainted = True | |
| 592 dir_dgst.update(util.interpolate_bytes( | |
| 593 b"1:f,%d:%s,", | |
| 594 len(fso.alt_fsname), | |
| 595 fso.alt_fsname)) | |
| 596 else: | |
| 597 dir_dgst.update(util.interpolate_bytes( | |
| 598 b"1:f,%d:%s,", len(fso.fsname), fso.fsname)) | |
| 516 dir_size += fso.stat.st_size | 599 dir_size += fso.stat.st_size |
| 517 if self._with_metadata_mtime: | 600 if self._with_metadata_mtime: |
| 518 mtime = datetime.datetime.utcfromtimestamp( | 601 mtime = datetime.datetime.utcfromtimestamp( |
| 519 int(fso.stat.st_mtime)) | 602 int(fso.stat.st_mtime)) |
| 520 mtime = util.b(mtime.isoformat("T") + "Z") | 603 mtime = util.b(mtime.isoformat("T") + "Z") |
| 532 if not self._size_only: | 615 if not self._size_only: |
| 533 dgst = digest.compute_digest_file( | 616 dgst = digest.compute_digest_file( |
| 534 self._algorithm[0], fso.path, use_mmap=self._use_mmap) | 617 self._algorithm[0], fso.path, use_mmap=self._use_mmap) |
| 535 dir_dgst.update(util.interpolate_bytes( | 618 dir_dgst.update(util.interpolate_bytes( |
| 536 b"%d:%s,", len(dgst), dgst)) | 619 b"%d:%s,", len(dgst), dgst)) |
| 537 opath = "/".join(top) + "/" + fso.name if top else fso.name | 620 opath = join_output_path(top, fso.name) |
| 621 if self._utf8_mode: | |
| 622 opath = walk.WalkDirEntry.alt_u8(opath) | |
| 623 else: | |
| 624 opath = walk.WalkDirEntry.alt_fs(opath) | |
| 538 if self._size_only: | 625 if self._size_only: |
| 539 self._outfp.write(format_bsd_line( | 626 self._outfp.write(format_bsd_line( |
| 540 "SIZE", None, opath, False, fso.stat.st_size)) | 627 "SIZE", None, opath, False, fso.stat.st_size)) |
| 541 else: | 628 else: |
| 542 if self._print_size: | 629 if self._print_size: |
| 546 else: | 633 else: |
| 547 self._outfp.write(format_bsd_line( | 634 self._outfp.write(format_bsd_line( |
| 548 self._algorithm[1], dgst, opath, | 635 self._algorithm[1], dgst, opath, |
| 549 self._use_base64)) | 636 self._use_base64)) |
| 550 self._outfp.flush() | 637 self._outfp.flush() |
| 551 | 638 opath = join_output_path(top, None) |
| 552 opath = "/".join(top) + "/" if top else "" | 639 if opath: |
| 640 if self._utf8_mode: | |
| 641 opath = walk.WalkDirEntry.alt_u8(opath) | |
| 642 else: | |
| 643 opath = walk.WalkDirEntry.alt_fs(opath) | |
| 553 if self._size_only: | 644 if self._size_only: |
| 554 self._outfp.write(format_bsd_line( | 645 self._outfp.write(format_bsd_line( |
| 555 "SIZE", None, opath, False, dir_size)) | 646 "SIZE", None, opath, False, dir_size)) |
| 556 else: | 647 else: |
| 648 if dir_tainted: | |
| 649 self._outfp.write(format_bsd_line( | |
| 650 b"ERROR", None, b"directory is tainted", False, None)) | |
| 557 if self._print_size: | 651 if self._print_size: |
| 558 self._outfp.write(format_bsd_line( | 652 self._outfp.write(format_bsd_line( |
| 559 self._algorithm[1], dir_dgst.digest(), opath, | 653 self._algorithm[1], dir_dgst.digest(), opath, |
| 560 self._use_base64, dir_size)) | 654 self._use_base64, dir_size)) |
| 561 else: | 655 else: |
| 562 self._outfp.write(format_bsd_line( | 656 self._outfp.write(format_bsd_line( |
| 563 self._algorithm[1], dir_dgst.digest(), opath, | 657 self._algorithm[1], dir_dgst.digest(), opath, |
| 564 self._use_base64)) | 658 self._use_base64)) |
| 565 self._outfp.flush() | 659 self._outfp.flush() |
| 566 return (dir_dgst.digest(), dir_size) | 660 return (dir_dgst.digest(), dir_size) |
| 661 | |
| 662 | |
| 663 def join_output_path(top, name): | |
| 664 if name is None: | |
| 665 # a path for a directory is to be computed | |
| 666 if top: | |
| 667 if isinstance(top[0], bytes): | |
| 668 return b"/".join(top) + b"/" | |
| 669 else: | |
| 670 return u"/".join(top) + u"/" | |
| 671 else: | |
| 672 return b"" | |
| 673 else: | |
| 674 # a path for a normal file is to be computed | |
| 675 if top: | |
| 676 if isinstance(name, bytes): | |
| 677 return b"/".join(top) + b"/" + name | |
| 678 else: | |
| 679 return u"/".join(top) + u"/" + name | |
| 680 else: | |
| 681 return name | |
| 567 | 682 |
| 568 | 683 |
| 569 class CRC32Output(object): | 684 class CRC32Output(object): |
| 570 | 685 |
| 571 """Wrapper for a minimal binary file contextmanager that calculates | 686 """Wrapper for a minimal binary file contextmanager that calculates |
| 650 if what in (b"FSENCODING", b"ISOTIMESTAMP", b"FLAGS", b"VERSION", | 765 if what in (b"FSENCODING", b"ISOTIMESTAMP", b"FLAGS", b"VERSION", |
| 651 b"CRC32"): | 766 b"CRC32"): |
| 652 assert filename is None | 767 assert filename is None |
| 653 return util.interpolate_bytes(b"%s = %s%s", what, util.b(value), ls) | 768 return util.interpolate_bytes(b"%s = %s%s", what, util.b(value), ls) |
| 654 assert filename is not None | 769 assert filename is not None |
| 655 if what == b"COMMENT": | 770 if what in (b"COMMENT", b"ERROR"): |
| 656 return util.interpolate_bytes( | 771 return util.interpolate_bytes( |
| 657 b"COMMENT (%s)%s", util.b(filename, "utf-8"), ls) | 772 b"%s (%s)%s", what, util.b(filename, "utf-8"), ls) |
| 658 if not isinstance(filename, bytes): | 773 if not isinstance(filename, bytes): |
| 659 filename = util.fsencode(filename) | 774 filename = util.fsencode(filename) |
| 660 if what == b"SIZE": | 775 if what == b"SIZE": |
| 661 return util.interpolate_bytes(b"SIZE (%s) = %d%s", filename, size, ls) | 776 return util.interpolate_bytes(b"SIZE (%s) = %d%s", filename, size, ls) |
| 662 if value is None: | 777 if value is None: |
| 681 | 796 |
| 682 Supports the iterator and context manager protocol. | 797 Supports the iterator and context manager protocol. |
| 683 | 798 |
| 684 """ | 799 """ |
| 685 | 800 |
| 686 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines | 801 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines |
| 687 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long | 802 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 688 PATTERN2 = re.compile(br"\A(ROOT|COMMENT)[ \t]*\((.*)\)[ \t]*\r?\n\Z") | 803 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 689 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long | 804 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)[ \t]*=[ \t]*(\d+)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 690 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long | 805 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)[ \t]*=[ \t]*([A-Za-z0-9=+/]+)(,(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 691 | 806 |
| 692 def __init__(self, _fp, _filename, _own_fp): | 807 def __init__(self, _fp, _filename, _own_fp): |
| 693 self._fp = _fp | 808 self._fp = _fp |
| 694 self._own_fp = _own_fp | 809 self._own_fp = _own_fp |
| 695 self._filename = _filename | 810 self._filename = _filename |
| 810 return (util.n(mo.group(1)), util.n(mo.group(2))) | 925 return (util.n(mo.group(1)), util.n(mo.group(2))) |
| 811 else: | 926 else: |
| 812 mo = self.PATTERN2.search(line) | 927 mo = self.PATTERN2.search(line) |
| 813 if mo: | 928 if mo: |
| 814 self._update_crc(line) | 929 self._update_crc(line) |
| 815 if mo.group(1) == b"COMMENT": | 930 if mo.group(1) in (b"COMMENT", b"ERROR"): |
| 816 return ("COMMENT", util.u(mo.group(2), "utf-8")) | 931 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8")) |
| 817 elif mo.group(1) == b"ROOT": | 932 elif mo.group(1) == b"ROOT": |
| 818 return ("ROOT", mo.group(2)) | 933 return ("ROOT", mo.group(2)) |
| 819 assert False, line | 934 assert False, line |
| 820 else: | 935 else: |
| 821 mo = self.PATTERN3.search(line) | 936 mo = self.PATTERN3.search(line) |
