comparison cutils/treesum.py @ 302:bf88323d6bf7

treesum: Implement --exclude/--include. - Filtering - Document in output - Handle in the "info" command
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 05 Mar 2025 10:07:44 +0100
parents d246b631b85a
children 73d13be531b5
comparison
equal deleted inserted replaced
301:d246b631b85a 302:bf88323d6bf7
500 out_cm = open(opts.output, "ab") 500 out_cm = open(opts.output, "ab")
501 else: 501 else:
502 out_cm = open(opts.output, "wb") 502 out_cm = open(opts.output, "wb")
503 out_cm = CRC32Output(out_cm) 503 out_cm = CRC32Output(out_cm)
504 504
505 fnmatcher = fnmatch.FnMatcher.build_from_commandline_patterns(
506 opts.fnmatch_filters)
507
505 with out_cm as outfp: 508 with out_cm as outfp:
506 writer = TreesumWriter(outfp) 509 writer = TreesumWriter(outfp)
507 for d in opts.directories: 510 for d in opts.directories:
508 V1DirectoryTreesumGenerator( 511 V1DirectoryTreesumGenerator(
509 opts.algorithm, opts.mmap, opts.base64, 512 opts.algorithm, opts.mmap, opts.base64,
513 opts.metadata_full_mode, 516 opts.metadata_full_mode,
514 opts.metadata_mtime, 517 opts.metadata_mtime,
515 opts.size_only, 518 opts.size_only,
516 opts.print_size, 519 opts.print_size,
517 opts.utf8, 520 opts.utf8,
518 minimal=opts.minimal).generate( 521 minimal=opts.minimal,
522 fnmatcher=fnmatcher).generate(
519 writer, d, comment=opts.comment) 523 writer, d, comment=opts.comment)
520 524
521 525
522 class V1DirectoryTreesumGenerator(object): 526 class V1DirectoryTreesumGenerator(object):
523 527
524 def __init__(self, algorithm, use_mmap, use_base64, 528 def __init__(self, algorithm, use_mmap, use_base64,
525 follow_symlinks, 529 follow_symlinks,
526 with_generator, 530 with_generator,
527 with_metadata_mode, with_metadata_full_mode, 531 with_metadata_mode, with_metadata_full_mode,
528 with_metadata_mtime, size_only, print_size, utf8_mode, 532 with_metadata_mtime, size_only, print_size, utf8_mode,
529 minimal=None,): 533 minimal=None,
534 fnmatcher=None):
530 super(V1DirectoryTreesumGenerator, self).__init__() 535 super(V1DirectoryTreesumGenerator, self).__init__()
531 self._algorithm = algorithm 536 self._algorithm = algorithm
532 self._use_mmap = use_mmap 537 self._use_mmap = use_mmap
533 self._use_base64 = use_base64 538 self._use_base64 = use_base64
534 self._follow_symlinks = follow_symlinks 539 self._follow_symlinks = follow_symlinks
538 self._with_metadata_mtime = with_metadata_mtime 543 self._with_metadata_mtime = with_metadata_mtime
539 self._size_only = size_only 544 self._size_only = size_only
540 self._print_size = print_size 545 self._print_size = print_size
541 self._utf8_mode = utf8_mode 546 self._utf8_mode = utf8_mode
542 self._minimal = minimal 547 self._minimal = minimal
548 self._fnmatcher = fnmatcher
543 549
544 def generate(self, writer, root, comment=None): 550 def generate(self, writer, root, comment=None):
545 """ 551 """
546 552
547 :param outfp: a *binary* file with a "write()" and a "flush()" method 553 :param outfp: a *binary* file with a "write()" and a "flush()" method
605 611
606 if comment: 612 if comment:
607 for line in comment: 613 for line in comment:
608 self._writer.write_comment(line) 614 self._writer.write_comment(line)
609 615
616 for action, kind, pattern in self._fnmatcher.definitions():
617 self._writer.write_fnmatch_pattern(action, kind, pattern)
618
610 if self._minimal is not None: 619 if self._minimal is not None:
611 self._writer.write_root( 620 self._writer.write_root(
612 (walk.WalkDirEntry.alt_u8(self._minimal) 621 (walk.WalkDirEntry.alt_u8(self._minimal)
613 if self._minimal else b"")) 622 if self._minimal else b""))
614 else: 623 else:
615 self._writer.write_root(walk.WalkDirEntry.alt_u8( 624 self._writer.write_root(walk.WalkDirEntry.alt_u8(
616 util.normalize_filename(root, True))) 625 util.normalize_filename(root, True)))
626
617 self._writer.flush() 627 self._writer.flush()
618 628
619 if not self._follow_symlinks.command_line and os.path.islink(root): 629 if not self._follow_symlinks.command_line and os.path.islink(root):
620 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root)) 630 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
621 linkdgst = self._algorithm[0]() 631 linkdgst = self._algorithm[0]()
646 if top: 656 if top:
647 logging.debug("Recursing into directory: %s/%r", root, top) 657 logging.debug("Recursing into directory: %s/%r", root, top)
648 else: 658 else:
649 logging.debug("Handling root directory: %s", root) 659 logging.debug("Handling root directory: %s", root)
650 path = os.path.join(root, *top) if top else root 660 path = os.path.join(root, *top) if top else root
661 # Determine also the path to be used for directory filtering
662 fpath = join_output_path(top, None) if top else ""
663 if self._fnmatcher:
664 if not self._fnmatcher.shall_visit(fpath):
665 logging.debug("Skipping directory: %s", fpath)
666 return (None, None, None, None)
651 try: 667 try:
652 with walk.ScanDir(path) as dirscan: 668 with walk.ScanDir(path) as dirscan:
653 fsobjects = list(dirscan) 669 fsobjects = list(dirscan)
654 except OSError as e: 670 except OSError as e:
655 # 671 #
673 opath = walk.WalkDirEntry.alt_u8(path) 689 opath = walk.WalkDirEntry.alt_u8(path)
674 else: 690 else:
675 opath = walk.WalkDirEntry.alt_fs(path) 691 opath = walk.WalkDirEntry.alt_fs(path)
676 self._writer.write_error(util.interpolate_bytes( 692 self._writer.write_error(util.interpolate_bytes(
677 b"`%s': %s", opath, errmsg)) 693 b"`%s': %s", opath, errmsg))
694 # Reuse from top
678 opath = join_output_path(top, None) 695 opath = join_output_path(top, None)
679 if opath: 696 if opath:
680 if self._utf8_mode: 697 if self._utf8_mode:
681 opath = walk.WalkDirEntry.alt_u8(opath) 698 opath = walk.WalkDirEntry.alt_u8(opath)
682 else: 699 else:
708 effective_fso_name = fso.alt_fsname 725 effective_fso_name = fso.alt_fsname
709 else: 726 else:
710 effective_fso_name = fso.fsname 727 effective_fso_name = fso.fsname
711 # Determine the path (mostly its prefix) that is to be printed 728 # Determine the path (mostly its prefix) that is to be printed
712 opath = join_output_path(top, fso.name) 729 opath = join_output_path(top, fso.name)
730 # Determine the path to be used for filename filtering
731 fpath = opath
732 if self._fnmatcher:
733 if not self._fnmatcher.shall_visit(fpath):
734 logging.debug("Skipping: %s", fpath)
735 continue
713 if self._utf8_mode: 736 if self._utf8_mode:
714 opath = walk.WalkDirEntry.alt_u8(opath) 737 opath = walk.WalkDirEntry.alt_u8(opath)
715 else: 738 else:
716 opath = walk.WalkDirEntry.alt_fs(opath) 739 opath = walk.WalkDirEntry.alt_fs(opath)
717 if fso.is_special: 740 if fso.is_special:
865 888
866 # Get subdir data from recursing into it 889 # Get subdir data from recursing into it
867 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \ 890 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \
868 self._generate(root, top + (fso.name, )) 891 self._generate(root, top + (fso.name, ))
869 892
893 #
894 # Check first whether the directory was selected to be
895 # excluded
896 #
897 if sub_dir_errno is None:
898 # Yes -- skipped
899 continue
870 if sub_dir_errno == 0: 900 if sub_dir_errno == 0:
871 dir_size += sub_dir_size 901 dir_size += sub_dir_size
872 else: 902 else:
873 dir_tainted = True 903 dir_tainted = True
874 dir_dgst.update(util.interpolate_bytes( 904 dir_dgst.update(util.interpolate_bytes(
1189 1219
1190 def write_fsencoding(self, encoding): 1220 def write_fsencoding(self, encoding):
1191 self.write(b"FSENCODING = ") 1221 self.write(b"FSENCODING = ")
1192 self.writeln(util.b(encoding)) 1222 self.writeln(util.b(encoding))
1193 1223
1224 def write_fnmatch_pattern(self, action, kind, pattern):
1225 self.write(b"FNMATCH (")
1226 self.write(util.b(action))
1227 self.write(b": ")
1228 self.write(util.b(kind))
1229 self.write(b":")
1230 self.write(util.b(pattern, "utf-8"))
1231 self.writeln(b")")
1232
1194 def write_flags(self, flags): 1233 def write_flags(self, flags):
1195 self.write(b"FLAGS = ") 1234 self.write(b"FLAGS = ")
1196 if isinstance(flags, (str, bytes)): 1235 if isinstance(flags, (str, bytes)):
1197 self.writeln(util.b(flags)) 1236 self.writeln(util.b(flags))
1198 else: 1237 else:
1281 1320
1282 """ 1321 """
1283 1322
1284 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines 1323 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z") # empty lines
1285 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long 1324 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z") # noqa: E501 line too long
1286 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long 1325 PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR|FNMATCH)[ \t]*\((.*)\)[ \t]*\r?\n\Z") # noqa: E501 line too long
1287 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long 1326 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z") # noqa: E501 line too long
1288 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z") # noqa: E501 line too long 1327 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z") # noqa: E501 line too long
1289 1328
1290 def __init__(self, _fp, _filename, _own_fp): 1329 def __init__(self, _fp, _filename, _own_fp):
1291 self._fp = _fp 1330 self._fp = _fp
1408 return (util.n(mo.group(1)), util.n(mo.group(2))) 1447 return (util.n(mo.group(1)), util.n(mo.group(2)))
1409 else: 1448 else:
1410 mo = self.PATTERN2.search(line) 1449 mo = self.PATTERN2.search(line)
1411 if mo: 1450 if mo:
1412 self._update_crc(line) 1451 self._update_crc(line)
1413 if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR"): 1452 if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR",
1453 b"FNMATCH"):
1414 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8")) 1454 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8"))
1415 elif mo.group(1) == b"ROOT": 1455 elif mo.group(1) == b"ROOT":
1416 return ("ROOT", mo.group(2)) 1456 return ("ROOT", mo.group(2))
1417 assert False, line 1457 assert False, line
1418 else: 1458 else:
1495 with reader: 1535 with reader:
1496 root = generator = flags = fsencoding = algorithm = digest \ 1536 root = generator = flags = fsencoding = algorithm = digest \
1497 = size = None 1537 = size = None
1498 errors = set() 1538 errors = set()
1499 comments = [] 1539 comments = []
1540 fnmatch_filters = []
1500 in_block = False 1541 in_block = False
1501 block_no = 0 1542 block_no = 0
1502 for record in reader: 1543 for record in reader:
1503 if record[0] == "VERSION": 1544 if record[0] == "VERSION":
1504 assert record[1] == "1" 1545 assert record[1] == "1"
1517 root = record[1] 1558 root = record[1]
1518 elif record[0] == "COMMENT": 1559 elif record[0] == "COMMENT":
1519 comments.append(record[1]) 1560 comments.append(record[1])
1520 elif record[0] == "ERROR": 1561 elif record[0] == "ERROR":
1521 errors.add(record[1]) 1562 errors.add(record[1])
1563 elif record[0] == "FNMATCH":
1564 fnmatch_filters.append(record[1])
1522 elif record[0] in ("TIMESTAMP", "ISOTIMESTAMP"): 1565 elif record[0] in ("TIMESTAMP", "ISOTIMESTAMP"):
1523 pass 1566 pass
1524 elif record[0] == "CRC32": 1567 elif record[0] == "CRC32":
1525 pass 1568 pass
1526 # in_block = False 1569 # in_block = False
1538 digest = record[2] 1581 digest = record[2]
1539 size = record[3] 1582 size = record[3]
1540 if not print_only_last_block: 1583 if not print_only_last_block:
1541 print_block_data( 1584 print_block_data(
1542 block_no, 1585 block_no,
1543 root, generator, fsencoding, flags, comments, 1586 root, generator, fsencoding, flags,
1544 errors, algorithm, digest, size) 1587 fnmatch_filters,
1588 comments, errors, algorithm, digest, size)
1545 root = generator = flags = fsencoding = algorithm \ 1589 root = generator = flags = fsencoding = algorithm \
1546 = digest = size = None 1590 = digest = size = None
1547 errors = set() 1591 errors = set()
1548 comments = [] 1592 comments = []
1549 in_block = False 1593 in_block = False
1550 if print_only_last_block: 1594 if print_only_last_block:
1551 if not in_block: 1595 if not in_block:
1552 if digest is not None or size is not None: 1596 if digest is not None or size is not None:
1553 print_block_data( 1597 print_block_data(
1554 block_no, 1598 block_no,
1555 root, generator, fsencoding, flags, comments, errors, 1599 root, generator, fsencoding, flags, fnmatch_filters,
1556 algorithm, digest, size) 1600 comments, errors, algorithm, digest, size)
1557 else: 1601 else:
1558 logging.warning("missing block end") 1602 logging.warning("missing block end")
1559 1603
1560 1604
1561 def print_block_data(block_no, tag, generator, fsencoding, flags, comments, 1605 def print_block_data(block_no, tag, generator, fsencoding, flags,
1562 errors, algorithm, digest, size): 1606 fnmatch_filters, comments, errors,
1607 algorithm, digest, size):
1563 digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>" 1608 digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>"
1564 sizestr = str(size) if size is not None else "<no size>" 1609 sizestr = str(size) if size is not None else "<no size>"
1565 print("BLOCK No %d:" % (block_no,)) 1610 print("BLOCK No %d:" % (block_no,))
1566 print(" Tag:", tag) 1611 print(" Tag:", tag)
1567 print(" FS-Encoding:", fsencoding) 1612 print(" FS-Encoding:", fsencoding)
1568 if generator: 1613 if generator:
1569 print(" Generator:", generator) 1614 print(" Generator:", generator)
1570 print(" Flags:", flags if flags else "<none>") 1615 print(" Flags:", flags if flags else "<none>")
1571 if comments: 1616 if comments:
1572 print(" Comments:", comments) 1617 print(" Comments:", comments)
1618 if fnmatch_filters:
1619 for f in fnmatch_filters:
1620 print(" FNMatch:", f)
1573 print(" Algorithm:", algorithm) 1621 print(" Algorithm:", algorithm)
1574 if algorithm != "SIZE": 1622 if algorithm != "SIZE":
1575 print(" Digest:", digeststr) 1623 print(" Digest:", digeststr)
1576 print(" Size:", sizestr) 1624 print(" Size:", sizestr)
1577 print(" Errors:", errors if errors else "<none>") 1625 print(" Errors:", errors if errors else "<none>")