Mercurial > hgrepos > Python > apps > py-cutils
comparison cutils/treesum.py @ 302:bf88323d6bf7
treesum: Implement --exclude/--include.
- Filtering
- Document in output
- Handle in the "info" command
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Wed, 05 Mar 2025 10:07:44 +0100 |
| parents | d246b631b85a |
| children | 73d13be531b5 |
comparison
equal
deleted
inserted
replaced
| 301:d246b631b85a | 302:bf88323d6bf7 |
|---|---|
| 500 out_cm = open(opts.output, "ab") | 500 out_cm = open(opts.output, "ab") |
| 501 else: | 501 else: |
| 502 out_cm = open(opts.output, "wb") | 502 out_cm = open(opts.output, "wb") |
| 503 out_cm = CRC32Output(out_cm) | 503 out_cm = CRC32Output(out_cm) |
| 504 | 504 |
| 505 fnmatcher = fnmatch.FnMatcher.build_from_commandline_patterns( | |
| 506 opts.fnmatch_filters) | |
| 507 | |
| 505 with out_cm as outfp: | 508 with out_cm as outfp: |
| 506 writer = TreesumWriter(outfp) | 509 writer = TreesumWriter(outfp) |
| 507 for d in opts.directories: | 510 for d in opts.directories: |
| 508 V1DirectoryTreesumGenerator( | 511 V1DirectoryTreesumGenerator( |
| 509 opts.algorithm, opts.mmap, opts.base64, | 512 opts.algorithm, opts.mmap, opts.base64, |
| 513 opts.metadata_full_mode, | 516 opts.metadata_full_mode, |
| 514 opts.metadata_mtime, | 517 opts.metadata_mtime, |
| 515 opts.size_only, | 518 opts.size_only, |
| 516 opts.print_size, | 519 opts.print_size, |
| 517 opts.utf8, | 520 opts.utf8, |
| 518 minimal=opts.minimal).generate( | 521 minimal=opts.minimal, |
| 522 fnmatcher=fnmatcher).generate( | |
| 519 writer, d, comment=opts.comment) | 523 writer, d, comment=opts.comment) |
| 520 | 524 |
| 521 | 525 |
| 522 class V1DirectoryTreesumGenerator(object): | 526 class V1DirectoryTreesumGenerator(object): |
| 523 | 527 |
| 524 def __init__(self, algorithm, use_mmap, use_base64, | 528 def __init__(self, algorithm, use_mmap, use_base64, |
| 525 follow_symlinks, | 529 follow_symlinks, |
| 526 with_generator, | 530 with_generator, |
| 527 with_metadata_mode, with_metadata_full_mode, | 531 with_metadata_mode, with_metadata_full_mode, |
| 528 with_metadata_mtime, size_only, print_size, utf8_mode, | 532 with_metadata_mtime, size_only, print_size, utf8_mode, |
| 529 minimal=None,): | 533 minimal=None, |
| 534 fnmatcher=None): | |
| 530 super(V1DirectoryTreesumGenerator, self).__init__() | 535 super(V1DirectoryTreesumGenerator, self).__init__() |
| 531 self._algorithm = algorithm | 536 self._algorithm = algorithm |
| 532 self._use_mmap = use_mmap | 537 self._use_mmap = use_mmap |
| 533 self._use_base64 = use_base64 | 538 self._use_base64 = use_base64 |
| 534 self._follow_symlinks = follow_symlinks | 539 self._follow_symlinks = follow_symlinks |
| 538 self._with_metadata_mtime = with_metadata_mtime | 543 self._with_metadata_mtime = with_metadata_mtime |
| 539 self._size_only = size_only | 544 self._size_only = size_only |
| 540 self._print_size = print_size | 545 self._print_size = print_size |
| 541 self._utf8_mode = utf8_mode | 546 self._utf8_mode = utf8_mode |
| 542 self._minimal = minimal | 547 self._minimal = minimal |
| 548 self._fnmatcher = fnmatcher | |
| 543 | 549 |
| 544 def generate(self, writer, root, comment=None): | 550 def generate(self, writer, root, comment=None): |
| 545 """ | 551 """ |
| 546 | 552 |
| 547 :param outfp: a *binary* file with a "write()" and a "flush()" method | 553 :param outfp: a *binary* file with a "write()" and a "flush()" method |
| 605 | 611 |
| 606 if comment: | 612 if comment: |
| 607 for line in comment: | 613 for line in comment: |
| 608 self._writer.write_comment(line) | 614 self._writer.write_comment(line) |
| 609 | 615 |
| 616 for action, kind, pattern in self._fnmatcher.definitions(): | |
| 617 self._writer.write_fnmatch_pattern(action, kind, pattern) | |
| 618 | |
| 610 if self._minimal is not None: | 619 if self._minimal is not None: |
| 611 self._writer.write_root( | 620 self._writer.write_root( |
| 612 (walk.WalkDirEntry.alt_u8(self._minimal) | 621 (walk.WalkDirEntry.alt_u8(self._minimal) |
| 613 if self._minimal else b"")) | 622 if self._minimal else b"")) |
| 614 else: | 623 else: |
| 615 self._writer.write_root(walk.WalkDirEntry.alt_u8( | 624 self._writer.write_root(walk.WalkDirEntry.alt_u8( |
| 616 util.normalize_filename(root, True))) | 625 util.normalize_filename(root, True))) |
| 626 | |
| 617 self._writer.flush() | 627 self._writer.flush() |
| 618 | 628 |
| 619 if not self._follow_symlinks.command_line and os.path.islink(root): | 629 if not self._follow_symlinks.command_line and os.path.islink(root): |
| 620 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) | 630 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) |
| 621 linkdgst = self._algorithm[0]() | 631 linkdgst = self._algorithm[0]() |
| 646 if top: | 656 if top: |
| 647 logging.debug("Recursing into directory: %s/%r", root, top) | 657 logging.debug("Recursing into directory: %s/%r", root, top) |
| 648 else: | 658 else: |
| 649 logging.debug("Handling root directory: %s", root) | 659 logging.debug("Handling root directory: %s", root) |
| 650 path = os.path.join(root, *top) if top else root | 660 path = os.path.join(root, *top) if top else root |
| 661 # Determine also the path to be used for directory filtering | |
| 662 fpath = join_output_path(top, None) if top else "" | |
| 663 if self._fnmatcher: | |
| 664 if not self._fnmatcher.shall_visit(fpath): | |
| 665 logging.debug("Skipping directory: %s", fpath) | |
| 666 return (None, None, None, None) | |
| 651 try: | 667 try: |
| 652 with walk.ScanDir(path) as dirscan: | 668 with walk.ScanDir(path) as dirscan: |
| 653 fsobjects = list(dirscan) | 669 fsobjects = list(dirscan) |
| 654 except OSError as e: | 670 except OSError as e: |
| 655 # | 671 # |
| 673 opath = walk.WalkDirEntry.alt_u8(path) | 689 opath = walk.WalkDirEntry.alt_u8(path) |
| 674 else: | 690 else: |
| 675 opath = walk.WalkDirEntry.alt_fs(path) | 691 opath = walk.WalkDirEntry.alt_fs(path) |
| 676 self._writer.write_error(util.interpolate_bytes( | 692 self._writer.write_error(util.interpolate_bytes( |
| 677 b"`%s': %s", opath, errmsg)) | 693 b"`%s': %s", opath, errmsg)) |
| 694 # Reuse from top | |
| 678 opath = join_output_path(top, None) | 695 opath = join_output_path(top, None) |
| 679 if opath: | 696 if opath: |
| 680 if self._utf8_mode: | 697 if self._utf8_mode: |
| 681 opath = walk.WalkDirEntry.alt_u8(opath) | 698 opath = walk.WalkDirEntry.alt_u8(opath) |
| 682 else: | 699 else: |
| 708 effective_fso_name = fso.alt_fsname | 725 effective_fso_name = fso.alt_fsname |
| 709 else: | 726 else: |
| 710 effective_fso_name = fso.fsname | 727 effective_fso_name = fso.fsname |
| 711 # Determine the path (mostly its prefix) that is to be printed | 728 # Determine the path (mostly its prefix) that is to be printed |
| 712 opath = join_output_path(top, fso.name) | 729 opath = join_output_path(top, fso.name) |
| 730 # Determine the path to be used for filename filtering | |
| 731 fpath = opath | |
| 732 if self._fnmatcher: | |
| 733 if not self._fnmatcher.shall_visit(fpath): | |
| 734 logging.debug("Skipping: %s", fpath) | |
| 735 continue | |
| 713 if self._utf8_mode: | 736 if self._utf8_mode: |
| 714 opath = walk.WalkDirEntry.alt_u8(opath) | 737 opath = walk.WalkDirEntry.alt_u8(opath) |
| 715 else: | 738 else: |
| 716 opath = walk.WalkDirEntry.alt_fs(opath) | 739 opath = walk.WalkDirEntry.alt_fs(opath) |
| 717 if fso.is_special: | 740 if fso.is_special: |
| 865 | 888 |
| 866 # Get subdir data from recursing into it | 889 # Get subdir data from recursing into it |
| 867 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \ | 890 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \ |
| 868 self._generate(root, top + (fso.name, )) | 891 self._generate(root, top + (fso.name, )) |
| 869 | 892 |
| 893 # | |
| 894 # Check first whether the directory was selected to be | |
| 895 # excluded | |
| 896 # | |
| 897 if sub_dir_errno is None: | |
| 898 # Yes -- skipped | |
| 899 continue | |
| 870 if sub_dir_errno == 0: | 900 if sub_dir_errno == 0: |
| 871 dir_size += sub_dir_size | 901 dir_size += sub_dir_size |
| 872 else: | 902 else: |
| 873 dir_tainted = True | 903 dir_tainted = True |
| 874 dir_dgst.update(util.interpolate_bytes( | 904 dir_dgst.update(util.interpolate_bytes( |
| 1189 | 1219 |
| 1190 def write_fsencoding(self, encoding): | 1220 def write_fsencoding(self, encoding): |
| 1191 self.write(b"FSENCODING = ") | 1221 self.write(b"FSENCODING = ") |
| 1192 self.writeln(util.b(encoding)) | 1222 self.writeln(util.b(encoding)) |
| 1193 | 1223 |
| 1224 def write_fnmatch_pattern(self, action, kind, pattern): | |
| 1225 self.write(b"FNMATCH (") | |
| 1226 self.write(util.b(action)) | |
| 1227 self.write(b": ") | |
| 1228 self.write(util.b(kind)) | |
| 1229 self.write(b":") | |
| 1230 self.write(util.b(pattern, "utf-8")) | |
| 1231 self.writeln(b")") | |
| 1232 | |
| 1194 def write_flags(self, flags): | 1233 def write_flags(self, flags): |
| 1195 self.write(b"FLAGS = ") | 1234 self.write(b"FLAGS = ") |
| 1196 if isinstance(flags, (str, bytes)): | 1235 if isinstance(flags, (str, bytes)): |
| 1197 self.writeln(util.b(flags)) | 1236 self.writeln(util.b(flags)) |
| 1198 else: | 1237 else: |
| 1281 | 1320 |
| 1282 """ | 1321 """ |
| 1283 | 1322 |
| 1284 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines | 1323 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines |
| 1285 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long | 1324 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1286 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long | 1325 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR|FNMATCH)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1287 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long | 1326 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1288 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z") # noqa: E501 line too long | 1327 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z") # noqa: E501 line too long |
| 1289 | 1328 |
| 1290 def __init__(self, _fp, _filename, _own_fp): | 1329 def __init__(self, _fp, _filename, _own_fp): |
| 1291 self._fp = _fp | 1330 self._fp = _fp |
| 1408 return (util.n(mo.group(1)), util.n(mo.group(2))) | 1447 return (util.n(mo.group(1)), util.n(mo.group(2))) |
| 1409 else: | 1448 else: |
| 1410 mo = self.PATTERN2.search(line) | 1449 mo = self.PATTERN2.search(line) |
| 1411 if mo: | 1450 if mo: |
| 1412 self._update_crc(line) | 1451 self._update_crc(line) |
| 1413 if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR"): | 1452 if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR", |
| 1453 b"FNMATCH"): | |
| 1414 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8")) | 1454 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8")) |
| 1415 elif mo.group(1) == b"ROOT": | 1455 elif mo.group(1) == b"ROOT": |
| 1416 return ("ROOT", mo.group(2)) | 1456 return ("ROOT", mo.group(2)) |
| 1417 assert False, line | 1457 assert False, line |
| 1418 else: | 1458 else: |
| 1495 with reader: | 1535 with reader: |
| 1496 root = generator = flags = fsencoding = algorithm = digest \ | 1536 root = generator = flags = fsencoding = algorithm = digest \ |
| 1497 = size = None | 1537 = size = None |
| 1498 errors = set() | 1538 errors = set() |
| 1499 comments = [] | 1539 comments = [] |
| 1540 fnmatch_filters = [] | |
| 1500 in_block = False | 1541 in_block = False |
| 1501 block_no = 0 | 1542 block_no = 0 |
| 1502 for record in reader: | 1543 for record in reader: |
| 1503 if record[0] == "VERSION": | 1544 if record[0] == "VERSION": |
| 1504 assert record[1] == "1" | 1545 assert record[1] == "1" |
| 1517 root = record[1] | 1558 root = record[1] |
| 1518 elif record[0] == "COMMENT": | 1559 elif record[0] == "COMMENT": |
| 1519 comments.append(record[1]) | 1560 comments.append(record[1]) |
| 1520 elif record[0] == "ERROR": | 1561 elif record[0] == "ERROR": |
| 1521 errors.add(record[1]) | 1562 errors.add(record[1]) |
| 1563 elif record[0] == "FNMATCH": | |
| 1564 fnmatch_filters.append(record[1]) | |
| 1522 elif record[0] in ("TIMESTAMP", "ISOTIMESTAMP"): | 1565 elif record[0] in ("TIMESTAMP", "ISOTIMESTAMP"): |
| 1523 pass | 1566 pass |
| 1524 elif record[0] == "CRC32": | 1567 elif record[0] == "CRC32": |
| 1525 pass | 1568 pass |
| 1526 # in_block = False | 1569 # in_block = False |
| 1538 digest = record[2] | 1581 digest = record[2] |
| 1539 size = record[3] | 1582 size = record[3] |
| 1540 if not print_only_last_block: | 1583 if not print_only_last_block: |
| 1541 print_block_data( | 1584 print_block_data( |
| 1542 block_no, | 1585 block_no, |
| 1543 root, generator, fsencoding, flags, comments, | 1586 root, generator, fsencoding, flags, |
| 1544 errors, algorithm, digest, size) | 1587 fnmatch_filters, |
| 1588 comments, errors, algorithm, digest, size) | |
| 1545 root = generator = flags = fsencoding = algorithm \ | 1589 root = generator = flags = fsencoding = algorithm \ |
| 1546 = digest = size = None | 1590 = digest = size = None |
| 1547 errors = set() | 1591 errors = set() |
| 1548 comments = [] | 1592 comments = [] |
| 1549 in_block = False | 1593 in_block = False |
| 1550 if print_only_last_block: | 1594 if print_only_last_block: |
| 1551 if not in_block: | 1595 if not in_block: |
| 1552 if digest is not None or size is not None: | 1596 if digest is not None or size is not None: |
| 1553 print_block_data( | 1597 print_block_data( |
| 1554 block_no, | 1598 block_no, |
| 1555 root, generator, fsencoding, flags, comments, errors, | 1599 root, generator, fsencoding, flags, fnmatch_filters, |
| 1556 algorithm, digest, size) | 1600 comments, errors, algorithm, digest, size) |
| 1557 else: | 1601 else: |
| 1558 logging.warning("missing block end") | 1602 logging.warning("missing block end") |
| 1559 | 1603 |
| 1560 | 1604 |
| 1561 def print_block_data(block_no, tag, generator, fsencoding, flags, comments, | 1605 def print_block_data(block_no, tag, generator, fsencoding, flags, |
| 1562 errors, algorithm, digest, size): | 1606 fnmatch_filters, comments, errors, |
| 1607 algorithm, digest, size): | |
| 1563 digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>" | 1608 digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>" |
| 1564 sizestr = str(size) if size is not None else "<no size>" | 1609 sizestr = str(size) if size is not None else "<no size>" |
| 1565 print("BLOCK No %d:" % (block_no,)) | 1610 print("BLOCK No %d:" % (block_no,)) |
| 1566 print(" Tag:", tag) | 1611 print(" Tag:", tag) |
| 1567 print(" FS-Encoding:", fsencoding) | 1612 print(" FS-Encoding:", fsencoding) |
| 1568 if generator: | 1613 if generator: |
| 1569 print(" Generator:", generator) | 1614 print(" Generator:", generator) |
| 1570 print(" Flags:", flags if flags else "<none>") | 1615 print(" Flags:", flags if flags else "<none>") |
| 1571 if comments: | 1616 if comments: |
| 1572 print(" Comments:", comments) | 1617 print(" Comments:", comments) |
| 1618 if fnmatch_filters: | |
| 1619 for f in fnmatch_filters: | |
| 1620 print(" FNMatch:", f) | |
| 1573 print(" Algorithm:", algorithm) | 1621 print(" Algorithm:", algorithm) |
| 1574 if algorithm != "SIZE": | 1622 if algorithm != "SIZE": |
| 1575 print(" Digest:", digeststr) | 1623 print(" Digest:", digeststr) |
| 1576 print(" Size:", sizestr) | 1624 print(" Size:", sizestr) |
| 1577 print(" Errors:", errors if errors else "<none>") | 1625 print(" Errors:", errors if errors else "<none>") |
