view cutils/util/glob.py @ 296:ca293f708cb4

Begin some preparation for handling glob-style patterns in treeview. Needed to implement inclusions and exclusions.
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 02 Mar 2025 22:54:40 +0100
parents
children 141a3aa0b403
line wrap: on
line source

# -*- coding: utf-8 -*-
# :-
# :Copyright: (c) 2020-2025 Franz Glasner
# :License:   BSD-3-Clause
# :-
r"""Glob handling.

.. seealso::
   - https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-
   - https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob

The following rules are used to interpret glob patterns:

- The ``*`` character matches zero or more characters of a name
  component without crossing directory boundaries.

- The ``**`` characters matches zero or more characters crossing
  directory boundaries.

- The ``?`` character matches exactly one character of a name component.

- The backslash character (``\``) is used to escape characters that
  would otherwise be interpreted as special characters. The expression
  ``\\`` matches a single backslash and ``\{`` matches a left brace for
  example.

- The ``[ ]`` characters are a bracket expression that match a single
  character of a name component out of a set of characters. For example,
  ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may
  be used to specify a range so ``[a-z]`` specifies a range that matches
  from "``a``" to "``z``" (inclusive). These forms can be mixed so
  ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or
  "``g``".

  If the character after the ``[`` is a ``!`` then it is used for negation
  so ``[!a-c]`` matches any character except "``a``", "``b``", or "``c``".

  Within a bracket expression the ``*``, ``?`` and ``\`` characters match
  themselves.

  The ``-`` character matches itself if it is the first or last character
  within the brackets, or the first or last character after the ``!`` if
  negating.

  Also, the ``]`` character matches itself if it is the first character
  within the brackets, or the first character after the ``!`` if negating.

- The ``{ }`` characters are a group of subpatterns, where the group matches
  if any subpattern in the group matches.

  The ``,`` character is used to separate the subpatterns. Groups cannot be
  nested.

- Leading period/dot characters in file name are treated as regular characters
  in match operations. For example, the ``*`` glob pattern matches file name
  ``.login``.

- All other characters match themselves.

"""

from __future__ import print_function, absolute_import


__all__ = ["glob_to_regexp"]


from . import PY2


def glob_to_regexp(g):
    pass


class CharIter(object):

    """Iterator over byte or unicode strings with peek support.

    On Python3 always yields an octet of :class:`bytes` instead of
    :class:`int`s if the iterator iterates over :class:`bytes`.

    """

    __slots__ = ("_it", "_nch")

    def __init__(self, w):
        self._nch = None
        if PY2:
            if isinstance(w, (bytes, unicode)):  # noqa: F821 undefined name
                self._it = iter(w)
            else:
                self._it = w
        else:
            if isinstance(w, (bytes, str)):
                self._it = iter(w)
            else:
                self._it = w

    def __iter__(self):
        return self

    def __next__(self):
        if self._nch is not None:
            c = self._nch
            self._nch = None
            return c
        c = next(self._it)
        return bytes((c,)) if isinstance(c, int) else c

    if PY2:
        next = __next__

    def peek(self):
        """Peek the next character.

        Return `None` if the iterator is exhausted.

        """
        if self._nch is not None:
            return self._nch
        self._nch = next(self._it, None)
        return self._nch