Mercurial > hgrepos > Python > apps > py-cutils
view cutils/util/fnmatch.py @ 312:f5f54b9c3552
treesum: Extensively improved "help patterns":
- glob syntax rules
- examples
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 10 Mar 2025 01:57:25 +0100 |
| parents | 652870b20f9e |
| children | 48430941c18c |
line wrap: on
line source
# -*- coding: utf-8 -*- # :- # :Copyright: (c) 2020-2025 Franz Glasner # :License: BSD-3-Clause # :- r"""File name matching. """ from __future__ import print_function, absolute_import __all__ = ["FnMatcher"] import re from . import PY2 from . import glob HELP_DESCRIPTION = r""" PATTERNs ======== Filename matching allows several types of patterns. Each pattern starts with its type specification. glob: case-sensitive, anchored at the begin and end iglob: case-insensitive variant of "glob" re: regular expression (Python style) path: plain path name (rooted), can be a file or a directory or a prefix thereof fullpath: exactly a single full path (file or directory), relative to the root of the tree The default if no type is given explicitely is "glob:". Glob Syntax Rules ----------------- - The `*' character matches zero or more characters of a name component without crossing directory boundaries. - The `**' characters matches zero or more characters crossing directory boundaries. - `**/' matches zero or more subdirectories; files do not match. - The `?' character matches exactly one character of a name component. - The backslash character (`\') is used to escape characters that would otherwise be interpreted as special characters. The expression `\\' matches a single backslash and `\{' matches a left brace for example. - The `[ ]' characters are a bracket expression that match a single character of a name component out of a set of characters. For example, `[abc]' matches "a", "b", or "c". The hyphen (`-') may be used to specify a range so `[a-z]' specifies a range that matches from "a" to "z" (inclusive). These forms can be mixed so `[abce-g]' matches "a", "b", "c", "e", "f" or "g". If the character after the `[' is a `!' then it is used for negation so `[!a-c]' matches any character except "a", "b", or "c". Within a bracket expression the `*', `?' and `\' characters match themselves. The `-' character matches itself if it is the first or last character within the brackets, or the first or last character after the `!' if negating. Also, the `]' character matches itself if it is the first character within the brackets, or the first character after the `!' if negating. - The curly brace characters `{ }' denote a group of subpatterns, where the group matches if any subpattern in the group matches. The `,' character is used to separate the subpatterns. Groups can be nested. - Leading period/dot characters in file name are treated as regular characters in match operations. For example, the `*' glob pattern matches file name `.login'. - All other characters match themselves. Examples -------- glob:*.py any name ending with ".py" in the root directory *.py the same as "glob:*.py" (because "glob:" is the default) re:\A[^/]*\.py\Z the same as "glob:*.py" glob:**.py any name ending with ".py" anywhere re:\.py\Z the same as "glob:**.py" glob:dir/* any name in directory "dir" Each of these patterns specify any name below directory "dir": glob:dir/** re:\Adir/ path:dir/ Each of these patterns specify any name in any directory that ends with "file": glob:**/file re:(^|/)file\Z These patterns specify a single path: filepath:dir1/dir2/file re:\Adir1/dir2/file\Z """ def glob_factory(pattern): cpat = re.compile( # automatically anchored "\\A{}\\Z".format(glob.glob_to_regexp(pattern)), re.DOTALL) def _glob_matcher(s): return cpat.search(s) is not None return _glob_matcher def iglob_factory(pattern): cpat = re.compile( # automatically anchored "\\A{}\\Z".format(glob.glob_to_regexp(pattern)), re.DOTALL | re.IGNORECASE) def _iglob_matcher(s): return cpat.search(s) is not None return _iglob_matcher def re_factory(pattern): cpat = re.compile(pattern, re.DOTALL) def _re_matcher(s): return cpat.search(s) is not None return _re_matcher def path_factory(pattern): def _path_matcher(s): return s.startswith(pattern) return _path_matcher def fullpath_factory(pattern): def _fullpath_matcher(s): return s == pattern return _fullpath_matcher class FnMatcher(object): _registry = { "glob": glob_factory, "iglob": iglob_factory, "re": re_factory, "path": path_factory, "fullpath": fullpath_factory, } def __init__(self, matchers): super(FnMatcher, self).__init__() self._matchers = matchers @classmethod def build_from_commandline_patterns(klass, filter_definitions): matchers = [] if filter_definitions: for action, kpattern in filter_definitions: assert action in ("include", "exclude", "accept-treesum") kind, sep, pattern = kpattern.partition(':') if not sep: # use the default kind = "glob" pattern = kpattern factory = klass._registry.get(kind, None) if not factory: raise RuntimeError("unknown pattern kind: {}".format(kind)) matchers.append((action, kind, factory(pattern), pattern)) return klass(matchers) def shall_visit(self, fn, default=True): visit = default for action, kind, matcher, orig_pattern in self._matchers: if matcher(fn): if action == "include": visit = True elif action in ("exclude", "accept-treesum"): visit = False else: raise RuntimeError("unknown action: {}".format(action)) return visit def shall_accept_treesum(self, fn, default=False): accept = default for action, kind, matcher, orig_pattern in self._matchers: if action == "accept-treesum": if matcher(fn): accept = True elif action in ("include", "exclude"): pass else: raise RuntimeError("unknown action: {}".format(action)) return accept def definitions(self): for action, kind, matcher, orig_pattern in self._matchers: yield (action, kind, orig_pattern) def __bool__(self): return bool(self._matchers) if PY2: __nonzero__ = __bool__
