view cutils/util/fnmatch.py @ 323:48430941c18c

Adopt copyright and license wordings from https://reuse.software/faq/. While there normalize copyright years for every file to start with the file's addition to the project (i.e. with the year of file creation).
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 26 Mar 2025 18:42:23 +0100
parents f5f54b9c3552
children 54a6d4534ef4
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# SPDX-FileCopyrightText: © 2025 Franz Glasner
# SPDX-License-Identifier: BSD-3-Clause
# :-
r"""File name matching.

"""

from __future__ import print_function, absolute_import


__all__ = ["FnMatcher"]


import re

from . import PY2
from . import glob


HELP_DESCRIPTION = r"""
PATTERNs
========

Filename matching allows several types of patterns. Each pattern starts
with its type specification.

  glob:
    case-sensitive, anchored at the begin and end

  iglob:
    case-insensitive variant of "glob"

  re:
    regular expression (Python style)

  path:
    plain path name (rooted), can be a file or a directory or a prefix
    thereof

  fullpath:
    exactly a single full path (file or directory), relative to the
    root of the tree

The default if no type is given explicitely is "glob:".


Glob Syntax Rules
-----------------

- The `*' character matches zero or more characters of a name
  component without crossing directory boundaries.

- The `**' characters matches zero or more characters crossing
  directory boundaries.

- `**/' matches zero or more subdirectories; files do not match.

- The `?' character matches exactly one character of a name component.

- The backslash character (`\') is used to escape characters that
  would otherwise be interpreted as special characters. The expression
  `\\' matches a single backslash and `\{' matches a left brace for
  example.

- The `[ ]' characters are a bracket expression that match a single
  character of a name component out of a set of characters. For example,
  `[abc]' matches "a", "b", or "c". The hyphen (`-') may
  be used to specify a range so `[a-z]' specifies a range that matches
  from "a" to "z" (inclusive). These forms can be mixed so
  `[abce-g]' matches "a", "b", "c", "e", "f" or "g".

  If the character after the `[' is a `!' then it is used for negation
  so `[!a-c]' matches any character except "a", "b", or "c".

  Within a bracket expression the `*', `?' and `\' characters match
  themselves.

  The `-' character matches itself if it is the first or last character
  within the brackets, or the first or last character after the `!' if
  negating.

  Also, the `]' character matches itself if it is the first character
  within the brackets, or the first character after the `!' if negating.

- The curly brace characters `{ }' denote a group of subpatterns, where
  the group matches if any subpattern in the group matches.

  The `,' character is used to separate the subpatterns. Groups can be
  nested.

- Leading period/dot characters in file name are treated as regular characters
  in match operations. For example, the `*' glob pattern matches file name
  `.login'.

- All other characters match themselves.


Examples
--------

  glob:*.py
    any name ending with ".py" in the root directory

  *.py
    the same as "glob:*.py" (because "glob:" is the default)

  re:\A[^/]*\.py\Z
    the same as "glob:*.py"

  glob:**.py
    any name ending with ".py" anywhere

  re:\.py\Z
    the same as "glob:**.py"

  glob:dir/*
    any name in directory "dir"

  Each of these patterns specify any name below directory "dir":

    glob:dir/**

    re:\Adir/

    path:dir/

  Each of these patterns specify any name in any directory that ends with
  "file":

    glob:**/file

    re:(^|/)file\Z

  These patterns specify a single path:

    filepath:dir1/dir2/file

    re:\Adir1/dir2/file\Z

"""


def glob_factory(pattern):

    cpat = re.compile(
        # automatically anchored
        "\\A{}\\Z".format(glob.glob_to_regexp(pattern)),
        re.DOTALL)

    def _glob_matcher(s):
        return cpat.search(s) is not None

    return _glob_matcher


def iglob_factory(pattern):

    cpat = re.compile(
        # automatically anchored
        "\\A{}\\Z".format(glob.glob_to_regexp(pattern)),
        re.DOTALL | re.IGNORECASE)

    def _iglob_matcher(s):
        return cpat.search(s) is not None

    return _iglob_matcher


def re_factory(pattern):

    cpat = re.compile(pattern, re.DOTALL)

    def _re_matcher(s):
        return cpat.search(s) is not None

    return _re_matcher


def path_factory(pattern):

    def _path_matcher(s):
        return s.startswith(pattern)

    return _path_matcher


def fullpath_factory(pattern):

    def _fullpath_matcher(s):
        return s == pattern

    return _fullpath_matcher


class FnMatcher(object):

    _registry = {
        "glob": glob_factory,
        "iglob": iglob_factory,
        "re": re_factory,
        "path": path_factory,
        "fullpath": fullpath_factory,
    }

    def __init__(self, matchers):
        super(FnMatcher, self).__init__()
        self._matchers = matchers

    @classmethod
    def build_from_commandline_patterns(klass, filter_definitions):
        matchers = []
        if filter_definitions:
            for action, kpattern in filter_definitions:
                assert action in ("include", "exclude", "accept-treesum")
                kind, sep, pattern = kpattern.partition(':')
                if not sep:
                    # use the default
                    kind = "glob"
                    pattern = kpattern
                factory = klass._registry.get(kind, None)
                if not factory:
                    raise RuntimeError("unknown pattern kind: {}".format(kind))
                matchers.append((action, kind, factory(pattern), pattern))
        return klass(matchers)

    def shall_visit(self, fn, default=True):
        visit = default
        for action, kind, matcher, orig_pattern in self._matchers:
            if matcher(fn):
                if action == "include":
                    visit = True
                elif action in ("exclude", "accept-treesum"):
                    visit = False
                else:
                    raise RuntimeError("unknown action: {}".format(action))
        return visit

    def shall_accept_treesum(self, fn, default=False):
        accept = default
        for action, kind, matcher, orig_pattern in self._matchers:
            if action == "accept-treesum":
                if matcher(fn):
                    accept = True
            elif action in ("include", "exclude"):
                pass
            else:
                raise RuntimeError("unknown action: {}".format(action))
        return accept

    def definitions(self):
        for action, kind, matcher, orig_pattern in self._matchers:
            yield (action, kind, orig_pattern)

    def __bool__(self):
        return bool(self._matchers)

    if PY2:
        __nonzero__ = __bool__