Python/apps/py-cutils: cutils/treesum.py comparison

comparison cutils/treesum.py @ 302:bf88323d6bf7

treesum: Implement --exclude/--include. - Filtering - Document in output - Handle in the "info" command

author	Franz Glasner <fzglas.hg@dom66.de>
date	Wed, 05 Mar 2025 10:07:44 +0100
parents	d246b631b85a
children	73d13be531b5

comparison

equal deleted inserted replaced

-:d246b631b85a
+:bf88323d6bf7
 out_cm = open(opts.output, "ab")
 else:
 out_cm = open(opts.output, "wb")
 out_cm = CRC32Output(out_cm)
+fnmatcher = fnmatch.FnMatcher.build_from_commandline_patterns(
+opts.fnmatch_filters)
 with out_cm as outfp:
 writer = TreesumWriter(outfp)
 for d in opts.directories:
 V1DirectoryTreesumGenerator(
 opts.algorithm, opts.mmap, opts.base64,
 opts.metadata_full_mode,
 opts.metadata_mtime,
 opts.size_only,
 opts.print_size,
 opts.utf8,
-minimal=opts.minimal).generate(
+minimal=opts.minimal,
+fnmatcher=fnmatcher).generate(
 writer, d, comment=opts.comment)
 class V1DirectoryTreesumGenerator(object):
 def __init__(self, algorithm, use_mmap, use_base64,
 follow_symlinks,
 with_generator,
 with_metadata_mode, with_metadata_full_mode,
 with_metadata_mtime, size_only, print_size, utf8_mode,
-minimal=None,):
+minimal=None,
+fnmatcher=None):
 super(V1DirectoryTreesumGenerator, self).__init__()
 self._algorithm = algorithm
 self._use_mmap = use_mmap
 self._use_base64 = use_base64
 self._follow_symlinks = follow_symlinks
 self._with_metadata_mtime = with_metadata_mtime
 self._size_only = size_only
 self._print_size = print_size
 self._utf8_mode = utf8_mode
 self._minimal = minimal
+self._fnmatcher = fnmatcher
 def generate(self, writer, root, comment=None):
 """
 :param outfp: a *binary* file with a "write()" and a "flush()" method
 if comment:
 for line in comment:
 self._writer.write_comment(line)
+for action, kind, pattern in self._fnmatcher.definitions():
+self._writer.write_fnmatch_pattern(action, kind, pattern)
 if self._minimal is not None:
 self._writer.write_root(
 (walk.WalkDirEntry.alt_u8(self._minimal)
 if self._minimal else b""))
 else:
 self._writer.write_root(walk.WalkDirEntry.alt_u8(
 util.normalize_filename(root, True)))
 self._writer.flush()
 if not self._follow_symlinks.command_line and os.path.islink(root):
 linktgt = walk.WalkDirEntry.from_readlink(os.readlink(root))
 linkdgst = self._algorithm[0]()
 if top:
 logging.debug("Recursing into directory: %s/%r", root, top)
 else:
 logging.debug("Handling root directory: %s", root)
 path = os.path.join(root, *top) if top else root
+# Determine also the path to be used for directory filtering
+fpath = join_output_path(top, None) if top else ""
+if self._fnmatcher:
+if not self._fnmatcher.shall_visit(fpath):
+logging.debug("Skipping directory: %s", fpath)
+return (None, None, None, None)
 try:
 with walk.ScanDir(path) as dirscan:
 fsobjects = list(dirscan)
 except OSError as e:
 #
 opath = walk.WalkDirEntry.alt_u8(path)
 else:
 opath = walk.WalkDirEntry.alt_fs(path)
 self._writer.write_error(util.interpolate_bytes(
 b"`%s': %s", opath, errmsg))
+# Reuse from top
 opath = join_output_path(top, None)
 if opath:
 if self._utf8_mode:
 opath = walk.WalkDirEntry.alt_u8(opath)
 else:
 effective_fso_name = fso.alt_fsname
 else:
 effective_fso_name = fso.fsname
 # Determine the path (mostly its prefix) that is to be printed
 opath = join_output_path(top, fso.name)
+# Determine the path to be used for filename filtering
+fpath = opath
+if self._fnmatcher:
+if not self._fnmatcher.shall_visit(fpath):
+logging.debug("Skipping: %s", fpath)
+continue
 if self._utf8_mode:
 opath = walk.WalkDirEntry.alt_u8(opath)
 else:
 opath = walk.WalkDirEntry.alt_fs(opath)
 if fso.is_special:
 # Get subdir data from recursing into it
 sub_dir_errno, sub_dir_algo, sub_dir_dgst, sub_dir_size = \
 self._generate(root, top + (fso.name, ))
+#
+# Check first whether the directory was selected to be
+# excluded
+#
+if sub_dir_errno is None:
+# Yes -- skipped
+continue
 if sub_dir_errno == 0:
 dir_size += sub_dir_size
 else:
 dir_tainted = True
 dir_dgst.update(util.interpolate_bytes(
 def write_fsencoding(self, encoding):
 self.write(b"FSENCODING = ")
 self.writeln(util.b(encoding))
+def write_fnmatch_pattern(self, action, kind, pattern):
+self.write(b"FNMATCH (")
+self.write(util.b(action))
+self.write(b": ")
+self.write(util.b(kind))
+self.write(b":")
+self.write(util.b(pattern, "utf-8"))
+self.writeln(b")")
 def write_flags(self, flags):
 self.write(b"FLAGS = ")
 if isinstance(flags, (str, bytes)):
 self.writeln(util.b(flags))
 else:
 """
 PATTERN0 = re.compile(br"\A[ \t]*\r?\n\Z")   # empty lines
 PATTERN1 = re.compile(br"\A(VERSION|FSENCODING|FLAGS|TIMESTAMP|ISOTIMESTAMP|CRC32)[ \t]*=[ \t]*([^ \t]+)[ \t]*\r?\n\Z")      # noqa: E501  line too long
-PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR)[ \t]*\((.*)\)[ \t]*\r?\n\Z")                                       # noqa: E501  line too long
+PATTERN2 = re.compile(br"\A(ROOT|COMMENT|ERROR|GENERATOR|FNMATCH)[ \t]*\((.*)\)[ \t]*\r?\n\Z")                                       # noqa: E501  line too long
 PATTERN3 = re.compile(br"\ASIZE[ \t]*\((.*)\)([ \t]*=[ \t]*(\d+))?[ \t]*\r?\n\Z")                                               # noqa: E501  line too long
 PATTERN4 = re.compile(br"\A([A-Za-z0-9_-]+)[ \t]*\((.*)\)([ \t]*=[ \t]*([A-Za-z0-9=+/]+)?(,(\d+)?)?)?[ \t]*\r?\n\Z")              # noqa: E501  line too long
 def __init__(self, _fp, _filename, _own_fp):
 self._fp = _fp
 return (util.n(mo.group(1)), util.n(mo.group(2)))
 else:
 mo = self.PATTERN2.search(line)
 if mo:
 self._update_crc(line)
-if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR"):
+if mo.group(1) in (b"COMMENT", b"ERROR", b"GENERATOR",
+b"FNMATCH"):
 return (util.u(mo.group(1)), util.u(mo.group(2), "utf-8"))
 elif mo.group(1) == b"ROOT":
 return ("ROOT", mo.group(2))
 assert False, line
 else:
 with reader:
 root = generator = flags = fsencoding = algorithm = digest \
 = size = None
 errors = set()
 comments = []
+fnmatch_filters = []
 in_block = False
 block_no = 0
 for record in reader:
 if record[0] == "VERSION":
 assert record[1] == "1"
 root = record[1]
 elif record[0] == "COMMENT":
 comments.append(record[1])
 elif record[0] == "ERROR":
 errors.add(record[1])
+elif record[0] == "FNMATCH":
+fnmatch_filters.append(record[1])
 elif record[0] in ("TIMESTAMP", "ISOTIMESTAMP"):
 pass
 elif record[0] == "CRC32":
 pass
 # in_block = False
 digest = record[2]
 size = record[3]
 if not print_only_last_block:
 print_block_data(
 block_no,
-root, generator, fsencoding, flags, comments,
+root, generator, fsencoding, flags,
-errors, algorithm, digest, size)
+fnmatch_filters,
+comments, errors, algorithm, digest, size)
 root = generator = flags = fsencoding = algorithm \
 = digest = size = None
 errors = set()
 comments = []
 in_block = False
 if print_only_last_block:
 if not in_block:
 if digest is not None or size is not None:
 print_block_data(
 block_no,
-root, generator, fsencoding, flags, comments, errors,
+root, generator, fsencoding, flags, fnmatch_filters,
-algorithm, digest, size)
+comments, errors, algorithm, digest, size)
 else:
 logging.warning("missing block end")
-def print_block_data(block_no, tag, generator, fsencoding, flags, comments,
+def print_block_data(block_no, tag, generator, fsencoding, flags,
-errors, algorithm, digest, size):
+fnmatch_filters, comments, errors,
+algorithm, digest, size):
 digeststr = util.n(binascii.hexlify(digest)) if digest else "<no digest>"
 sizestr = str(size) if size is not None else "<no size>"
 print("BLOCK No %d:" % (block_no,))
 print("    Tag:", tag)
 print("    FS-Encoding:", fsencoding)
 if generator:
 print("    Generator:", generator)
 print("    Flags:", flags if flags else "<none>")
 if comments:
 print("    Comments:", comments)
+if fnmatch_filters:
+for f in fnmatch_filters:
+print("    FNMatch:", f)
 print("    Algorithm:", algorithm)
 if algorithm != "SIZE":
 print("    Digest:", digeststr)
 print("    Size:", sizestr)
 print("    Errors:", errors if errors else "<none>")

Mercurial > hgrepos > Python > apps > py-cutils

comparison cutils/treesum.py @ 302:bf88323d6bf7