changeset 296:ca293f708cb4

Begin some preparation for handling glob-style patterns in treeview. Needed to implement inclusions and exclusions.
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 02 Mar 2025 22:54:40 +0100
parents 4a259fb9968e
children 141a3aa0b403
files cutils/util/glob.py setup.cfg tests/test_match.py
diffstat 3 files changed, 192 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cutils/util/glob.py	Sun Mar 02 22:54:40 2025 +0100
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+# :-
+# :Copyright: (c) 2020-2025 Franz Glasner
+# :License:   BSD-3-Clause
+# :-
+r"""Glob handling.
+
+.. seealso::
+   - https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-
+   - https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
+
+The following rules are used to interpret glob patterns:
+
+- The ``*`` character matches zero or more characters of a name
+  component without crossing directory boundaries.
+
+- The ``**`` characters matches zero or more characters crossing
+  directory boundaries.
+
+- The ``?`` character matches exactly one character of a name component.
+
+- The backslash character (``\``) is used to escape characters that
+  would otherwise be interpreted as special characters. The expression
+  ``\\`` matches a single backslash and ``\{`` matches a left brace for
+  example.
+
+- The ``[ ]`` characters are a bracket expression that match a single
+  character of a name component out of a set of characters. For example,
+  ``[abc]`` matches "``a``", "``b``", or "``c``". The hyphen (``-``) may
+  be used to specify a range so ``[a-z]`` specifies a range that matches
+  from "``a``" to "``z``" (inclusive). These forms can be mixed so
+  ``[abce-g]`` matches "``a``", "``b``", "``c``", "``ey", "``f``" or
+  "``g``".
+
+  If the character after the ``[`` is a ``!`` then it is used for negation
+  so ``[!a-c]`` matches any character except "``a``", "``b``", or "``c``".
+
+  Within a bracket expression the ``*``, ``?`` and ``\`` characters match
+  themselves.
+
+  The ``-`` character matches itself if it is the first or last character
+  within the brackets, or the first or last character after the ``!`` if
+  negating.
+
+  Also, the ``]`` character matches itself if it is the first character
+  within the brackets, or the first character after the ``!`` if negating.
+
+- The ``{ }`` characters are a group of subpatterns, where the group matches
+  if any subpattern in the group matches.
+
+  The ``,`` character is used to separate the subpatterns. Groups cannot be
+  nested.
+
+- Leading period/dot characters in file name are treated as regular characters
+  in match operations. For example, the ``*`` glob pattern matches file name
+  ``.login``.
+
+- All other characters match themselves.
+
+"""
+
+from __future__ import print_function, absolute_import
+
+
+__all__ = ["glob_to_regexp"]
+
+
+from . import PY2
+
+
+def glob_to_regexp(g):
+    pass
+
+
+class CharIter(object):
+
+    """Iterator over byte or unicode strings with peek support.
+
+    On Python3 always yields an octet of :class:`bytes` instead of
+    :class:`int`s if the iterator iterates over :class:`bytes`.
+
+    """
+
+    __slots__ = ("_it", "_nch")
+
+    def __init__(self, w):
+        self._nch = None
+        if PY2:
+            if isinstance(w, (bytes, unicode)):  # noqa: F821 undefined name
+                self._it = iter(w)
+            else:
+                self._it = w
+        else:
+            if isinstance(w, (bytes, str)):
+                self._it = iter(w)
+            else:
+                self._it = w
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self._nch is not None:
+            c = self._nch
+            self._nch = None
+            return c
+        c = next(self._it)
+        return bytes((c,)) if isinstance(c, int) else c
+
+    if PY2:
+        next = __next__
+
+    def peek(self):
+        """Peek the next character.
+
+        Return `None` if the iterator is exhausted.
+
+        """
+        if self._nch is not None:
+            return self._nch
+        self._nch = next(self._it, None)
+        return self._nch
--- a/setup.cfg	Fri Feb 28 14:11:20 2025 +0100
+++ b/setup.cfg	Sun Mar 02 22:54:40 2025 +0100
@@ -58,3 +58,6 @@
 exclude =
     # Ignore the vendored crcmod2/crcmod sub-package
     cutils/crcmod
+per-file-ignores =
+    # E501: line too long
+    cutils/util/glob.py:E501
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_match.py	Sun Mar 02 22:54:40 2025 +0100
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+r"""Unit tests for :mod:`cutils.util.glob`
+
+"""
+
+from __future__ import absolute_import, print_function
+
+import _test_setup    # noqa: F401 imported but unused
+
+import sys
+import unittest
+
+from cutils.util.glob import CharIter
+
+
+class TestCharIter(unittest.TestCase):
+
+    def test_transitive_iter(self):
+        it = CharIter("1234")
+        self.assertIs(iter(it), it)
+
+    def test_native_str(self):
+        it = CharIter("1234")
+        chars = []
+        for c in it:
+            chars.append(c)
+        self.assertEqual("1234", "".join(chars))
+
+    def test_unicode_str(self):
+        it = CharIter(u"1234")
+        chars = []
+        for c in it:
+            chars.append(c)
+        self.assertEqual(u"1234", "".join(chars))
+
+    def test_byte_str(self):
+        it = CharIter(b"1234")
+        chars = []
+        for c in it:
+            chars.append(c)
+        self.assertEqual(b"1234", b"".join(chars))
+
+    def test_peek_exhausted(self):
+        it = CharIter("1234")
+        for _ in it:
+            pass
+        self.assertIsNone(it.peek())
+
+    def test_peek_first(self):
+        it = CharIter("1234")
+        self.assertEqual("1", it.peek())
+        chars = "".join(it)
+        self.assertEqual("1234", chars)
+        self.assertIsNone(it.peek())
+
+    def test_peek_from_second(self):
+        it = CharIter("1234")
+        self.assertEqual("1", it.peek())
+        self.assertEqual("1", next(it))
+        self.assertEqual("2", it.peek())
+        chars = "".join(it)
+        self.assertEqual("234", chars)
+        self.assertIsNone(it.peek())
+
+
+if __name__ == "__main__":
+    sys.exit(unittest.main())